Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
6ac701f8
"tests/python/vscode:/vscode.git/clone" did not exist on "3200b88b555f77e2a800b22f644cf62b827a57bc"
Commit
6ac701f8
authored
Sep 13, 2024
by
sangwzh
Browse files
update src and graphbolt code
parent
1547bd93
Changes
116
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
89 additions
and
56 deletions
+89
-56
graphbolt/src/unique_and_compact.cc
graphbolt/src/unique_and_compact.cc
+4
-3
src/array/arith.h
src/array/arith.h
+4
-3
src/array/array.cc
src/array/array.cc
+5
-4
src/array/array_arith.cc
src/array/array_arith.cc
+3
-2
src/array/cpu/array_cumsum.cc
src/array/cpu/array_cumsum.cc
+1
-0
src/array/cpu/array_sort.cc
src/array/cpu/array_sort.cc
+1
-1
src/array/cpu/gather_mm.cc
src/array/cpu/gather_mm.cc
+2
-1
src/array/cpu/labor_sampling.cc
src/array/cpu/labor_sampling.cc
+2
-1
src/array/cpu/rowwise_sampling.cc
src/array/cpu/rowwise_sampling.cc
+2
-1
src/array/cpu/rowwise_topk.cc
src/array/cpu/rowwise_topk.cc
+2
-1
src/array/cpu/sddmm.cc
src/array/cpu/sddmm.cc
+2
-1
src/array/cpu/segment_reduce.cc
src/array/cpu/segment_reduce.cc
+3
-2
src/array/cpu/spmm.cc
src/array/cpu/spmm.cc
+2
-1
src/array/cpu/traversal.cc
src/array/cpu/traversal.cc
+2
-1
src/array/cuda/array_cumsum.hip
src/array/cuda/array_cumsum.hip
+9
-6
src/array/cuda/array_index_select.cuh
src/array/cuda/array_index_select.cuh
+2
-0
src/array/cuda/array_index_select.hip
src/array/cuda/array_index_select.hip
+9
-6
src/array/cuda/array_nonzero.hip
src/array/cuda/array_nonzero.hip
+10
-6
src/array/cuda/array_op_impl.hip
src/array/cuda/array_op_impl.hip
+16
-12
src/array/cuda/array_scatter.hip
src/array/cuda/array_scatter.hip
+8
-4
No files found.
graphbolt/src/unique_and_compact.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2023 by Contributors
* Copyright (c) 2023 by Contributors
*
*
...
@@ -10,9 +11,9 @@
...
@@ -10,9 +11,9 @@
#include <unordered_map>
#include <unordered_map>
#include "
./
concurrent_id_hash_map.h"
#include "concurrent_id_hash_map.h"
#include "
./
macro.h"
#include "macro.h"
#include "
./
utils.h"
#include "utils.h"
namespace
graphbolt
{
namespace
graphbolt
{
namespace
sampling
{
namespace
sampling
{
...
...
src/array/arith.h
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/arith.h
* @file array/arith.h
...
@@ -6,13 +7,13 @@
...
@@ -6,13 +7,13 @@
#ifndef DGL_ARRAY_ARITH_H_
#ifndef DGL_ARRAY_ARITH_H_
#define DGL_ARRAY_ARITH_H_
#define DGL_ARRAY_ARITH_H_
#ifdef __
CUDA
CC__
#ifdef __
HIP
CC__
#define DGLDEVICE __device__
#define DGLDEVICE __device__
__host__
#define DGLINLINE __forceinline__
#define DGLINLINE __forceinline__
#else
#else
#define DGLDEVICE
#define DGLDEVICE
#define DGLINLINE inline
#define DGLINLINE inline
#endif // __
CUDA
CC__
#endif // __
HIP
CC__
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/array.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019-2022 by Contributors
* Copyright (c) 2019-2022 by Contributors
* @file array/array.cc
* @file array/array.cc
...
@@ -14,9 +15,9 @@
...
@@ -14,9 +15,9 @@
#include <sstream>
#include <sstream>
#include "../c_api_common.h"
#include "../c_api_common.h"
#include "
./
arith.h"
#include "arith.h"
#include "
./
array_op.h"
#include "array_op.h"
#include "
./
kernel_decl.h"
#include "kernel_decl.h"
using
namespace
dgl
::
runtime
;
using
namespace
dgl
::
runtime
;
...
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
...
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
// prob_or_mask is pinned and rows on GPU is valid
// prob_or_mask is pinned and rows on GPU is valid
CHECK_VALID_CONTEXT
(
prob_or_mask
,
rows
);
CHECK_VALID_CONTEXT
(
prob_or_mask
,
rows
);
ATEN_CSR_SWITCH_CUDA_UVA
(
mat
,
rows
,
XPU
,
IdType
,
"CSRRowWiseSampling"
,
{
ATEN_CSR_SWITCH_CUDA_UVA
(
mat
,
rows
,
XPU
,
IdType
,
"CSRRowWiseSampling"
,
{
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
XPU
==
kDGLCUDA
))
CHECK
(
!
(
prob_or_mask
->
dtype
.
bits
==
8
&&
(
XPU
==
kDGLCUDA
||
XPU
==
kDGLROCM
)
))
<<
"GPU sampling with masks is currently not supported yet."
;
<<
"GPU sampling with masks is currently not supported yet."
;
ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH
(
ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH
(
prob_or_mask
->
dtype
,
FloatType
,
"probability or mask"
,
{
prob_or_mask
->
dtype
,
FloatType
,
"probability or mask"
,
{
...
...
src/array/array_arith.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/array_aritch.cc
* @file array/array_aritch.cc
...
@@ -8,8 +9,8 @@
...
@@ -8,8 +9,8 @@
#include <dgl/runtime/ndarray.h>
#include <dgl/runtime/ndarray.h>
#include "../c_api_common.h"
#include "../c_api_common.h"
#include "
./
arith.h"
#include "arith.h"
#include "
./
array_op.h"
#include "array_op.h"
using
namespace
dgl
::
runtime
;
using
namespace
dgl
::
runtime
;
...
...
src/array/cpu/array_cumsum.cc
View file @
6ac701f8
...
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
IdType
*
out_d
=
ret
.
Ptr
<
IdType
>
();
IdType
*
out_d
=
ret
.
Ptr
<
IdType
>
();
out_d
[
0
]
=
in_d
[
0
];
out_d
[
0
]
=
in_d
[
0
];
for
(
int64_t
i
=
1
;
i
<
len
;
++
i
)
out_d
[
i
]
=
out_d
[
i
-
1
]
+
in_d
[
i
];
for
(
int64_t
i
=
1
;
i
<
len
;
++
i
)
out_d
[
i
]
=
out_d
[
i
-
1
]
+
in_d
[
i
];
std
::
cout
<<
"limm cpu ret : "
<<
ret
<<
std
::
endl
;
return
ret
;
return
ret
;
}
}
}
}
...
...
src/array/cpu/array_sort.cc
View file @
6ac701f8
...
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
...
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
}
}
template
<
typename
V1
,
typename
V2
>
template
<
typename
V1
,
typename
V2
>
struct
PairIterator
__host__
struct
PairIterator
:
public
std
::
iterator
<
:
public
std
::
iterator
<
std
::
random_access_iterator_tag
,
std
::
pair
<
V1
,
V2
>
,
std
::
ptrdiff_t
,
std
::
random_access_iterator_tag
,
std
::
pair
<
V1
,
V2
>
,
std
::
ptrdiff_t
,
std
::
pair
<
V1
*
,
V2
*>
,
PairRef
<
V1
,
V2
>>
{
std
::
pair
<
V1
*
,
V2
*>
,
PairRef
<
V1
,
V2
>>
{
...
...
src/array/cpu/gather_mm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/gaher_mm.cc
* @file kernel/cpu/gaher_mm.cc
* @brief GatherMM C APIs and definitions.
* @brief GatherMM C APIs and definitions.
*/
*/
#include "
./
gather_mm.h"
#include "gather_mm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/labor_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/*!
/*!
* Copyright (c) 2022, NVIDIA Corporation
* Copyright (c) 2022, NVIDIA Corporation
* Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
* Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
...
@@ -18,7 +19,7 @@
...
@@ -18,7 +19,7 @@
* \file array/cuda/labor_sampling.cc
* \file array/cuda/labor_sampling.cc
* \brief labor sampling
* \brief labor sampling
*/
*/
#include "
./
labor_pick.h"
#include "labor_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/rowwise_sampling.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_sampling.cc
* @file array/cpu/rowwise_sampling.cc
...
@@ -7,7 +8,7 @@
...
@@ -7,7 +8,7 @@
#include <numeric>
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/rowwise_topk.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/rowwise_topk.cc
* @file array/cpu/rowwise_topk.cc
...
@@ -6,7 +7,7 @@
...
@@ -6,7 +7,7 @@
#include <algorithm>
#include <algorithm>
#include <numeric>
#include <numeric>
#include "
./
rowwise_pick.h"
#include "rowwise_pick.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/sddmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file aten/cpu/sddmm.cc
* @file aten/cpu/sddmm.cc
* @brief SDDMM C APIs and definitions.
* @brief SDDMM C APIs and definitions.
*/
*/
#include "
./
sddmm.h"
#include "sddmm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/segment_reduce.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/segment_reduce.cc
* @file kernel/cpu/segment_reduce.cc
* @brief Segment reduce C APIs and definitions.
* @brief Segment reduce C APIs and definitions.
*/
*/
#include "
./
segment_reduce.h"
#include "segment_reduce.h"
#include <dgl/array.h>
#include <dgl/array.h>
#include <string>
#include <string>
#include "
./
spmm_binary_ops.h"
#include "spmm_binary_ops.h"
namespace
dgl
{
namespace
dgl
{
namespace
aten
{
namespace
aten
{
...
...
src/array/cpu/spmm.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file kernel/cpu/spmm.cc
* @file kernel/cpu/spmm.cc
* @brief SPMM C APIs and definitions.
* @brief SPMM C APIs and definitions.
*/
*/
#include "
./
spmm.h"
#include "spmm.h"
#include <dgl/array.h>
#include <dgl/array.h>
...
...
src/array/cpu/traversal.cc
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/traversal.cc
* @file array/cpu/traversal.cc
* @brief Graph traversal implementation
* @brief Graph traversal implementation
*/
*/
#include "
./
traversal.h"
#include "traversal.h"
#include <dgl/graph_traversal.h>
#include <dgl/graph_traversal.h>
...
...
src/array/cuda/array_cumsum.
cu
→
src/array/cuda/array_cumsum.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_cumsum.cu
* @file array/cpu/array_cumsum.cu
* @brief Array cumsum GPU implementation
* @brief Array cumsum GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.
cuh
>
#include <
hip
cub/
hip
cub.
hpp
>
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
: aten::Full(0, 1, array->dtype.bits, array->ctx);
: aten::Full(0, 1, array->dtype.bits, array->ctx);
auto device = runtime::DeviceAPI::Get(array->ctx);
auto device = runtime::DeviceAPI::Get(array->ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* in_d = array.Ptr<IdType>();
const IdType* in_d = array.Ptr<IdType>();
IdArray ret;
IdArray ret;
IdType* out_d = nullptr;
IdType* out_d = nullptr;
...
@@ -36,16 +39,16 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
...
@@ -36,16 +39,16 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
}
}
// Allocate workspace
// Allocate workspace
size_t workspace_size = 0;
size_t workspace_size = 0;
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
nullptr, workspace_size, in_d, out_d, len, stream));
nullptr, workspace_size, in_d, out_d, len, stream));
void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
// Compute cumsum
// Compute cumsum
CUDA_CALL
(
cub
::
DeviceScan
::
InclusiveSum
(
CUDA_CALL(
hip
cub::DeviceScan::InclusiveSum(
workspace, workspace_size, in_d, out_d, len, stream));
workspace, workspace_size, in_d, out_d, len, stream));
device->FreeWorkspace(array->ctx, workspace);
device->FreeWorkspace(array->ctx, workspace);
std::cout << "cuda ret : " << ret << std::endl;
return ret;
return ret;
}
}
...
...
src/array/cuda/array_index_select.cuh
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2021-2022 by Contributors
* Copyright (c) 2021-2022 by Contributors
* @file array/cuda/array_index_select.cuh
* @file array/cuda/array_index_select.cuh
...
...
src/array/cuda/array_index_select.
cu
→
src/array/cuda/array_index_select.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/cpu/array_index_select.cu
* @file array/cpu/array_index_select.cu
* @brief Array index select GPU implementation
* @brief Array index select GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
array_index_select.cuh"
#include "array_index_select.cuh"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -33,7 +36,7 @@ NDArray IndexSelect(NDArray array, IdArray index) {
...
@@ -33,7 +36,7 @@ NDArray IndexSelect(NDArray array, IdArray index) {
const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
const IdType* idx_data = static_cast<IdType*>(index->data);
const IdType* idx_data = static_cast<IdType*>(index->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
if (num_feat == 1) {
if (num_feat == 1) {
const int nt = cuda::FindNumThreads(len);
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
const int nb = (len + nt - 1) / nt;
...
@@ -61,9 +64,9 @@ template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
...
@@ -61,9 +64,9 @@ template NDArray IndexSelect<kDGLCUDA, int64_t, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, __half, int64_t>(NDArray, IdArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int32_t>(
NDArray, IdArray);
NDArray, IdArray);
template
NDArray
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template NDArray IndexSelect<kDGLCUDA, __
hip
_bfloat16, int64_t>(
NDArray, IdArray);
NDArray, IdArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
template NDArray IndexSelect<kDGLCUDA, float, int32_t>(NDArray, IdArray);
...
@@ -87,7 +90,7 @@ template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
...
@@ -87,7 +90,7 @@ template uint32_t IndexSelect<kDGLCUDA, uint32_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template uint64_t IndexSelect<kDGLCUDA, uint64_t>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
template __half IndexSelect<kDGLCUDA, __half>(NDArray array, int64_t index);
#if BF16_ENABLED
#if BF16_ENABLED
template
__
nv
_bfloat16
IndexSelect
<
kDGLCUDA
,
__
nv
_bfloat16
>(
template __
hip
_bfloat16 IndexSelect<kDGLCUDA, __
hip
_bfloat16>(
NDArray array, int64_t index);
NDArray array, int64_t index);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
template float IndexSelect<kDGLCUDA, float>(NDArray array, int64_t index);
...
...
src/array/cuda/array_nonzero.
cu
→
src/array/cuda/array_nonzero.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020 by Contributors
* Copyright (c) 2020 by Contributors
* @file array/cpu/array_nonzero.cc
* @file array/cpu/array_nonzero.cc
...
@@ -5,11 +7,13 @@
...
@@ -5,11 +7,13 @@
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include <cub/cub.cuh>
#include <hipcub/hipcub.hpp>
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -33,24 +37,24 @@ IdArray NonZero(IdArray array) {
...
@@ -33,24 +37,24 @@ IdArray NonZero(IdArray array) {
const int64_t len = array->shape[0];
const int64_t len = array->shape[0];
IdArray ret = NewIdArray(len, ctx, 64);
IdArray ret = NewIdArray(len, ctx, 64);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const IdType* const in_data = static_cast<const IdType*>(array->data);
const IdType* const in_data = static_cast<const IdType*>(array->data);
int64_t* const out_data = static_cast<int64_t*>(ret->data);
int64_t* const out_data = static_cast<int64_t*>(ret->data);
IsNonZeroIndex<IdType> comp(in_data);
IsNonZeroIndex<IdType> comp(in_data);
cub
::
CountingInputIterator
<
int64_t
>
counter
(
0
);
hip
cub::CountingInputIterator<int64_t> counter(0);
// room for cub to output on GPU
// room for cub to output on GPU
int64_t* d_num_nonzeros =
int64_t* d_num_nonzeros =
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
size_t temp_size = 0;
size_t temp_size = 0;
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp,
nullptr, temp_size, counter, out_data, d_num_nonzeros, len, comp,
stream));
stream));
void* temp = device->AllocWorkspace(ctx, temp_size);
void* temp = device->AllocWorkspace(ctx, temp_size);
CUDA_CALL
(
cub
::
DeviceSelect
::
If
(
CUDA_CALL(
hip
cub::DeviceSelect::If(
temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream));
temp, temp_size, counter, out_data, d_num_nonzeros, len, comp, stream));
device->FreeWorkspace(ctx, temp);
device->FreeWorkspace(ctx, temp);
...
...
src/array/cuda/array_op_impl.
cu
→
src/array/cuda/array_op_impl.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2020-2021 by Contributors
* Copyright (c) 2020-2021 by Contributors
* @file array/cuda/array_op_impl.cu
* @file array/cuda/array_op_impl.cu
* @brief Array operator GPU implementation
* @brief Array operator GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_hashtable.cuh"
#include "../../runtime/cuda/cuda_hashtable.cuh"
#include "../arith.h"
#include "../arith.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -36,7 +40,7 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
...
@@ -36,7 +40,7 @@ IdArray BinaryElewise(IdArray lhs, IdArray rhs) {
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -107,7 +111,7 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
...
@@ -107,7 +111,7 @@ IdArray BinaryElewise(IdArray lhs, IdType rhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -178,7 +182,7 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
...
@@ -178,7 +182,7 @@ IdArray BinaryElewise(IdType lhs, IdArray rhs) {
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
IdArray ret = NewIdArray(rhs->shape[0], rhs->ctx, rhs->dtype.bits);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
const IdType* rhs_data = static_cast<IdType*>(rhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -249,7 +253,7 @@ IdArray UnaryElewise(IdArray lhs) {
...
@@ -249,7 +253,7 @@ IdArray UnaryElewise(IdArray lhs) {
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
IdArray ret = NewIdArray(lhs->shape[0], lhs->ctx, lhs->dtype.bits);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
const IdType* lhs_data = static_cast<IdType*>(lhs->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(len);
int nt = cuda::FindNumThreads(len);
int nb = (len + nt - 1) / nt;
int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -277,7 +281,7 @@ template <DGLDeviceType XPU, typename DType>
...
@@ -277,7 +281,7 @@ template <DGLDeviceType XPU, typename DType>
NDArray Full(DType val, int64_t length, DGLContext ctx) {
NDArray Full(DType val, int64_t length, DGLContext ctx) {
NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
NDArray ret = NDArray::Empty({length}, DGLDataTypeTraits<DType>::dtype, ctx);
DType* ret_data = static_cast<DType*>(ret->data);
DType* ret_data = static_cast<DType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -292,8 +296,8 @@ template IdArray Full<kDGLCUDA, int64_t>(
...
@@ -292,8 +296,8 @@ template IdArray Full<kDGLCUDA, int64_t>(
template IdArray Full<kDGLCUDA, __half>(
template IdArray Full<kDGLCUDA, __half>(
__half val, int64_t length, DGLContext ctx);
__half val, int64_t length, DGLContext ctx);
#if BF16_ENABLED
#if BF16_ENABLED
template
IdArray
Full
<
kDGLCUDA
,
__
nv
_bfloat16
>(
template IdArray Full<kDGLCUDA, __
hip
_bfloat16>(
__
nv
_bfloat16
val
,
int64_t
length
,
DGLContext
ctx
);
__
hip
_bfloat16 val, int64_t length, DGLContext ctx);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template IdArray Full<kDGLCUDA, float>(
template IdArray Full<kDGLCUDA, float>(
float val, int64_t length, DGLContext ctx);
float val, int64_t length, DGLContext ctx);
...
@@ -319,7 +323,7 @@ IdArray Range(IdType low, IdType high, DGLContext ctx) {
...
@@ -319,7 +323,7 @@ IdArray Range(IdType low, IdType high, DGLContext ctx) {
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
IdArray ret = NewIdArray(length, ctx, sizeof(IdType) * 8);
if (length == 0) return ret;
if (length == 0) return ret;
IdType* ret_data = static_cast<IdType*>(ret->data);
IdType* ret_data = static_cast<IdType*>(ret->data);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
CUDA_KERNEL_CALL(
CUDA_KERNEL_CALL(
...
@@ -355,7 +359,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
...
@@ -355,7 +359,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
const auto& ctx = arrays[0]->ctx;
const auto& ctx = arrays[0]->ctx;
auto device = runtime::DeviceAPI::Get(ctx);
auto device = runtime::DeviceAPI::Get(ctx);
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
// build node maps and get the induced nodes
// build node maps and get the induced nodes
OrderedHashTable<IdType> node_map(total_length, ctx, stream);
OrderedHashTable<IdType> node_map(total_length, ctx, stream);
...
@@ -364,7 +368,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
...
@@ -364,7 +368,7 @@ IdArray Relabel_(const std::vector<IdArray>& arrays) {
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
static_cast<int64_t*>(device->AllocWorkspace(ctx, sizeof(int64_t)));
IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8);
IdArray induced_nodes = NewIdArray(total_length, ctx, sizeof(IdType) * 8);
CUDA_CALL
(
cuda
MemsetAsync
(
CUDA_CALL(
hip
MemsetAsync(
num_induced_device, 0, sizeof(*num_induced_device), stream));
num_induced_device, 0, sizeof(*num_induced_device), stream));
node_map.FillWithDuplicates(
node_map.FillWithDuplicates(
...
@@ -416,7 +420,7 @@ IdArray AsNumBits(IdArray arr, uint8_t bits) {
...
@@ -416,7 +420,7 @@ IdArray AsNumBits(IdArray arr, uint8_t bits) {
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
const std::vector<int64_t> shape(arr->shape, arr->shape + arr->ndim);
IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx);
IdArray ret = IdArray::Empty(shape, DGLDataType{kDGLInt, bits, 1}, arr->ctx);
const int64_t length = ret.NumElements();
const int64_t length = ret.NumElements();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
int nt = cuda::FindNumThreads(length);
int nt = cuda::FindNumThreads(length);
int nb = (length + nt - 1) / nt;
int nb = (length + nt - 1) / nt;
if (bits == 32) {
if (bits == 32) {
...
...
src/array/cuda/array_scatter.
cu
→
src/array/cuda/array_scatter.
hip
View file @
6ac701f8
// !!! This is a file automatically generated by hipify!!!
#include "hip/hip_runtime.h"
/**
/**
* Copyright (c) 2019 by Contributors
* Copyright (c) 2019 by Contributors
* @file array/cuda/array_scatter.cu
* @file array/cuda/array_scatter.cu
* @brief Array scatter GPU implementation
* @brief Array scatter GPU implementation
*/
*/
#include <dgl/array.h>
#include <dgl/array.h>
#include "../../../include/dgl/array.h"
#include "../../runtime/cuda/cuda_common.h"
#include "../../runtime/cuda/cuda_common.h"
#include "
./
utils.h"
#include "utils.h"
namespace dgl {
namespace dgl {
using runtime::NDArray;
using runtime::NDArray;
...
@@ -31,7 +35,7 @@ void Scatter_(IdArray index, NDArray value, NDArray out) {
...
@@ -31,7 +35,7 @@ void Scatter_(IdArray index, NDArray value, NDArray out) {
const DType* val = value.Ptr<DType>();
const DType* val = value.Ptr<DType>();
DType* outd = out.Ptr<DType>();
DType* outd = out.Ptr<DType>();
cuda
Stream_t
stream
=
runtime
::
getCurrent
CUDA
Stream
();
hip
Stream_t stream = runtime::getCurrent
HIP
Stream
MasqueradingAsCUDA
();
const int nt = cuda::FindNumThreads(len);
const int nt = cuda::FindNumThreads(len);
const int nb = (len + nt - 1) / nt;
const int nb = (len + nt - 1) / nt;
CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd);
CUDA_KERNEL_CALL(_ScatterKernel, nb, nt, 0, stream, idx, val, len, outd);
...
@@ -41,7 +45,7 @@ template void Scatter_<kDGLCUDA, int32_t, int32_t>(IdArray, NDArray, NDArray);
...
@@ -41,7 +45,7 @@ template void Scatter_<kDGLCUDA, int32_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int32_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int32_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int32_t>(
IdArray, NDArray, NDArray);
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int32_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, float, int32_t>(IdArray, NDArray, NDArray);
...
@@ -50,7 +54,7 @@ template void Scatter_<kDGLCUDA, int32_t, int64_t>(IdArray, NDArray, NDArray);
...
@@ -50,7 +54,7 @@ template void Scatter_<kDGLCUDA, int32_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, int64_t, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, __half, int64_t>(IdArray, NDArray, NDArray);
#if BF16_ENABLED
#if BF16_ENABLED
template
void
Scatter_
<
kDGLCUDA
,
__
nv
_bfloat16
,
int64_t
>(
template void Scatter_<kDGLCUDA, __
hip
_bfloat16, int64_t>(
IdArray, NDArray, NDArray);
IdArray, NDArray, NDArray);
#endif // BF16_ENABLED
#endif // BF16_ENABLED
template void Scatter_<kDGLCUDA, float, int64_t>(IdArray, NDArray, NDArray);
template void Scatter_<kDGLCUDA, float, int64_t>(IdArray, NDArray, NDArray);
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment