Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
aaaecbc9
Commit
aaaecbc9
authored
May 12, 2023
by
lisj
Browse files
处理kDLGPU为kDLROCM
parent
c454d419
Changes
54
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
76 additions
and
76 deletions
+76
-76
src/graph/sampling/randomwalks/randomwalk_gpu.cu
src/graph/sampling/randomwalks/randomwalk_gpu.cu
+11
-11
src/graph/sampling/randomwalks/randomwalks.cc
src/graph/sampling/randomwalks/randomwalks.cc
+1
-1
src/graph/transform/cuda/cuda_compact_graph.cu
src/graph/transform/cuda/cuda_compact_graph.cu
+4
-4
src/graph/transform/cuda/cuda_to_block.cu
src/graph/transform/cuda/cuda_to_block.cu
+6
-6
src/graph/transform/cuda/knn.cu
src/graph/transform/cuda/knn.cu
+8
-8
src/graph/transform/to_bipartite.cc
src/graph/transform/to_bipartite.cc
+2
-2
src/graph/unit_graph.h
src/graph/unit_graph.h
+1
-1
src/partition/cuda/partition_op.cu
src/partition/cuda/partition_op.cu
+18
-18
src/partition/ndarray_partition.cc
src/partition/ndarray_partition.cc
+13
-13
src/random/random.cc
src/random/random.cc
+1
-1
src/runtime/c_runtime_api.cc
src/runtime/c_runtime_api.cc
+1
-1
src/runtime/cuda/cuda_device_api.cc
src/runtime/cuda/cuda_device_api.cc
+4
-4
src/runtime/cuda/nccl_api.cu
src/runtime/cuda/nccl_api.cu
+2
-2
src/runtime/ndarray.cc
src/runtime/ndarray.cc
+4
-4
No files found.
src/graph/sampling/randomwalks/randomwalk_gpu.cu
View file @
aaaecbc9
...
@@ -219,7 +219,7 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
...
@@ -219,7 +219,7 @@ std::pair<IdArray, IdArray> RandomWalkUniform(
dim3
grid
((
num_seeds
+
TILE_SIZE
-
1
)
/
TILE_SIZE
);
dim3
grid
((
num_seeds
+
TILE_SIZE
-
1
)
/
TILE_SIZE
);
const
uint64_t
random_seed
=
RandomEngine
::
ThreadLocal
()
->
RandInt
(
1000000000
);
const
uint64_t
random_seed
=
RandomEngine
::
ThreadLocal
()
->
RandInt
(
1000000000
);
ATEN_FLOAT_TYPE_SWITCH
(
restart_prob
->
dtype
,
FloatType
,
"random walk GPU kernel"
,
{
ATEN_FLOAT_TYPE_SWITCH
(
restart_prob
->
dtype
,
FloatType
,
"random walk GPU kernel"
,
{
CHECK
(
restart_prob
->
ctx
.
device_type
==
kDL
GPU
)
<<
"restart prob should be in GPU."
;
CHECK
(
restart_prob
->
ctx
.
device_type
==
kDL
ROCM
)
<<
"restart prob should be in GPU."
;
CHECK
(
restart_prob
->
ndim
==
1
)
<<
"restart prob dimension should be 1."
;
CHECK
(
restart_prob
->
ndim
==
1
)
<<
"restart prob dimension should be 1."
;
const
FloatType
*
restart_prob_data
=
restart_prob
.
Ptr
<
FloatType
>
();
const
FloatType
*
restart_prob_data
=
restart_prob
.
Ptr
<
FloatType
>
();
const
int64_t
restart_prob_size
=
restart_prob
->
shape
[
0
];
const
int64_t
restart_prob_size
=
restart_prob
->
shape
[
0
];
...
@@ -350,7 +350,7 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
...
@@ -350,7 +350,7 @@ std::pair<IdArray, IdArray> RandomWalkBiased(
dim3
block
(
256
);
dim3
block
(
256
);
dim3
grid
((
num_seeds
+
TILE_SIZE
-
1
)
/
TILE_SIZE
);
dim3
grid
((
num_seeds
+
TILE_SIZE
-
1
)
/
TILE_SIZE
);
const
uint64_t
random_seed
=
RandomEngine
::
ThreadLocal
()
->
RandInt
(
1000000000
);
const
uint64_t
random_seed
=
RandomEngine
::
ThreadLocal
()
->
RandInt
(
1000000000
);
CHECK
(
restart_prob
->
ctx
.
device_type
==
kDL
GPU
)
<<
"restart prob should be in GPU."
;
CHECK
(
restart_prob
->
ctx
.
device_type
==
kDL
ROCM
)
<<
"restart prob should be in GPU."
;
CHECK
(
restart_prob
->
ndim
==
1
)
<<
"restart prob dimension should be 1."
;
CHECK
(
restart_prob
->
ndim
==
1
)
<<
"restart prob dimension should be 1."
;
const
FloatType
*
restart_prob_data
=
restart_prob
.
Ptr
<
FloatType
>
();
const
FloatType
*
restart_prob_data
=
restart_prob
.
Ptr
<
FloatType
>
();
const
int64_t
restart_prob_size
=
restart_prob
->
shape
[
0
];
const
int64_t
restart_prob_size
=
restart_prob
->
shape
[
0
];
...
@@ -480,7 +480,7 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
...
@@ -480,7 +480,7 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
const
IdArray
dst
,
const
IdArray
dst
,
const
int64_t
num_samples_per_node
,
const
int64_t
num_samples_per_node
,
const
int64_t
k
)
{
const
int64_t
k
)
{
CHECK
(
src
->
ctx
.
device_type
==
kDL
GPU
)
<<
CHECK
(
src
->
ctx
.
device_type
==
kDL
ROCM
)
<<
"IdArray needs be on GPU!"
;
"IdArray needs be on GPU!"
;
const
IdxType
*
src_data
=
src
.
Ptr
<
IdxType
>
();
const
IdxType
*
src_data
=
src
.
Ptr
<
IdxType
>
();
const
IdxType
*
dst_data
=
dst
.
Ptr
<
IdxType
>
();
const
IdxType
*
dst_data
=
dst
.
Ptr
<
IdxType
>
();
...
@@ -496,27 +496,27 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
...
@@ -496,27 +496,27 @@ std::tuple<IdArray, IdArray, IdArray> SelectPinSageNeighbors(
}
}
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalk
<
kDL
GPU
,
int32_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalk
<
kDL
ROCM
,
int32_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
const
std
::
vector
<
FloatArray
>
&
prob
);
const
std
::
vector
<
FloatArray
>
&
prob
);
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalk
<
kDL
GPU
,
int64_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalk
<
kDL
ROCM
,
int64_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
const
std
::
vector
<
FloatArray
>
&
prob
);
const
std
::
vector
<
FloatArray
>
&
prob
);
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithRestart
<
kDL
GPU
,
int32_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithRestart
<
kDL
ROCM
,
int32_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
const
std
::
vector
<
FloatArray
>
&
prob
,
const
std
::
vector
<
FloatArray
>
&
prob
,
double
restart_prob
);
double
restart_prob
);
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithRestart
<
kDL
GPU
,
int64_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithRestart
<
kDL
ROCM
,
int64_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
...
@@ -524,14 +524,14 @@ std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLGPU, int64_t>(
...
@@ -524,14 +524,14 @@ std::pair<IdArray, IdArray> RandomWalkWithRestart<kDLGPU, int64_t>(
double
restart_prob
);
double
restart_prob
);
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithStepwiseRestart
<
kDL
GPU
,
int32_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithStepwiseRestart
<
kDL
ROCM
,
int32_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
const
std
::
vector
<
FloatArray
>
&
prob
,
const
std
::
vector
<
FloatArray
>
&
prob
,
FloatArray
restart_prob
);
FloatArray
restart_prob
);
template
template
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithStepwiseRestart
<
kDL
GPU
,
int64_t
>
(
std
::
pair
<
IdArray
,
IdArray
>
RandomWalkWithStepwiseRestart
<
kDL
ROCM
,
int64_t
>
(
const
HeteroGraphPtr
hg
,
const
HeteroGraphPtr
hg
,
const
IdArray
seeds
,
const
IdArray
seeds
,
const
TypeArray
metapath
,
const
TypeArray
metapath
,
...
@@ -539,13 +539,13 @@ std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLGPU, int64_t>(
...
@@ -539,13 +539,13 @@ std::pair<IdArray, IdArray> RandomWalkWithStepwiseRestart<kDLGPU, int64_t>(
FloatArray
restart_prob
);
FloatArray
restart_prob
);
template
template
std
::
tuple
<
IdArray
,
IdArray
,
IdArray
>
SelectPinSageNeighbors
<
kDL
GPU
,
int32_t
>
(
std
::
tuple
<
IdArray
,
IdArray
,
IdArray
>
SelectPinSageNeighbors
<
kDL
ROCM
,
int32_t
>
(
const
IdArray
src
,
const
IdArray
src
,
const
IdArray
dst
,
const
IdArray
dst
,
const
int64_t
num_samples_per_node
,
const
int64_t
num_samples_per_node
,
const
int64_t
k
);
const
int64_t
k
);
template
template
std
::
tuple
<
IdArray
,
IdArray
,
IdArray
>
SelectPinSageNeighbors
<
kDL
GPU
,
int64_t
>
(
std
::
tuple
<
IdArray
,
IdArray
,
IdArray
>
SelectPinSageNeighbors
<
kDL
ROCM
,
int64_t
>
(
const
IdArray
src
,
const
IdArray
src
,
const
IdArray
dst
,
const
IdArray
dst
,
const
int64_t
num_samples_per_node
,
const
int64_t
num_samples_per_node
,
...
...
src/graph/sampling/randomwalks/randomwalks.cc
View file @
aaaecbc9
...
@@ -36,7 +36,7 @@ void CheckRandomWalkInputs(
...
@@ -36,7 +36,7 @@ void CheckRandomWalkInputs(
// CHECK_SAME_CONTEXT(seeds, metapath);
// CHECK_SAME_CONTEXT(seeds, metapath);
if
(
hg
->
IsPinned
())
{
if
(
hg
->
IsPinned
())
{
CHECK_EQ
(
seeds
->
ctx
.
device_type
,
kDL
GPU
)
<<
"Expected seeds ("
<<
seeds
->
ctx
<<
")"
\
CHECK_EQ
(
seeds
->
ctx
.
device_type
,
kDL
ROCM
)
<<
"Expected seeds ("
<<
seeds
->
ctx
<<
")"
\
<<
" to be on the GPU when the graph is pinned."
;
<<
" to be on the GPU when the graph is pinned."
;
}
else
if
(
hg
->
Context
()
!=
seeds
->
ctx
)
{
}
else
if
(
hg
->
Context
()
!=
seeds
->
ctx
)
{
LOG
(
FATAL
)
<<
"Expected seeds ("
<<
seeds
->
ctx
<<
")"
<<
" to have the same "
\
LOG
(
FATAL
)
<<
"Expected seeds ("
<<
seeds
->
ctx
<<
")"
<<
" to have the same "
\
...
...
src/graph/transform/cuda/cuda_compact_graph.cu
View file @
aaaecbc9
...
@@ -70,7 +70,7 @@ void BuildNodeMaps(
...
@@ -70,7 +70,7 @@ void BuildNodeMaps(
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
{
for
(
int64_t
ntype
=
0
;
ntype
<
num_ntypes
;
++
ntype
)
{
const
IdArray
&
nodes
=
input_nodes
[
ntype
];
const
IdArray
&
nodes
=
input_nodes
[
ntype
];
if
(
nodes
->
shape
[
0
]
>
0
)
{
if
(
nodes
->
shape
[
0
]
>
0
)
{
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
GPU
);
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
ROCM
);
node_maps
->
LhsHashTable
(
ntype
).
FillWithDuplicates
(
node_maps
->
LhsHashTable
(
ntype
).
FillWithDuplicates
(
nodes
.
Ptr
<
IdType
>
(),
nodes
.
Ptr
<
IdType
>
(),
nodes
->
shape
[
0
],
nodes
->
shape
[
0
],
...
@@ -92,7 +92,7 @@ CompactGraphsGPU(
...
@@ -92,7 +92,7 @@ CompactGraphsGPU(
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
hipStream_t
stream
=
runtime
::
getCurrentCUDAStream
();
hipStream_t
stream
=
runtime
::
getCurrentCUDAStream
();
CHECK_EQ
(
ctx
.
device_type
,
kDL
GPU
);
CHECK_EQ
(
ctx
.
device_type
,
kDL
ROCM
);
// Step 1: Collect the nodes that has connections for each type.
// Step 1: Collect the nodes that has connections for each type.
const
uint64_t
num_ntypes
=
graphs
[
0
]
->
NumVertexTypes
();
const
uint64_t
num_ntypes
=
graphs
[
0
]
->
NumVertexTypes
();
...
@@ -255,7 +255,7 @@ CompactGraphsGPU(
...
@@ -255,7 +255,7 @@ CompactGraphsGPU(
template
<
>
template
<
>
std
::
pair
<
std
::
vector
<
HeteroGraphPtr
>
,
std
::
vector
<
IdArray
>>
std
::
pair
<
std
::
vector
<
HeteroGraphPtr
>
,
std
::
vector
<
IdArray
>>
CompactGraphs
<
kDL
GPU
,
int32_t
>
(
CompactGraphs
<
kDL
ROCM
,
int32_t
>
(
const
std
::
vector
<
HeteroGraphPtr
>
&
graphs
,
const
std
::
vector
<
HeteroGraphPtr
>
&
graphs
,
const
std
::
vector
<
IdArray
>
&
always_preserve
)
{
const
std
::
vector
<
IdArray
>
&
always_preserve
)
{
return
CompactGraphsGPU
<
int32_t
>
(
graphs
,
always_preserve
);
return
CompactGraphsGPU
<
int32_t
>
(
graphs
,
always_preserve
);
...
@@ -263,7 +263,7 @@ CompactGraphs<kDLGPU, int32_t>(
...
@@ -263,7 +263,7 @@ CompactGraphs<kDLGPU, int32_t>(
template
<
>
template
<
>
std
::
pair
<
std
::
vector
<
HeteroGraphPtr
>
,
std
::
vector
<
IdArray
>>
std
::
pair
<
std
::
vector
<
HeteroGraphPtr
>
,
std
::
vector
<
IdArray
>>
CompactGraphs
<
kDL
GPU
,
int64_t
>
(
CompactGraphs
<
kDL
ROCM
,
int64_t
>
(
const
std
::
vector
<
HeteroGraphPtr
>
&
graphs
,
const
std
::
vector
<
HeteroGraphPtr
>
&
graphs
,
const
std
::
vector
<
IdArray
>
&
always_preserve
)
{
const
std
::
vector
<
IdArray
>
&
always_preserve
)
{
return
CompactGraphsGPU
<
int64_t
>
(
graphs
,
always_preserve
);
return
CompactGraphsGPU
<
int64_t
>
(
graphs
,
always_preserve
);
...
...
src/graph/transform/cuda/cuda_to_block.cu
View file @
aaaecbc9
...
@@ -82,7 +82,7 @@ class DeviceNodeMapMaker {
...
@@ -82,7 +82,7 @@ class DeviceNodeMapMaker {
for
(
int64_t
ntype
=
0
;
ntype
<
lhs_num_ntypes
;
++
ntype
)
{
for
(
int64_t
ntype
=
0
;
ntype
<
lhs_num_ntypes
;
++
ntype
)
{
const
IdArray
&
nodes
=
lhs_nodes
[
ntype
];
const
IdArray
&
nodes
=
lhs_nodes
[
ntype
];
if
(
nodes
->
shape
[
0
]
>
0
)
{
if
(
nodes
->
shape
[
0
]
>
0
)
{
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
GPU
);
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
ROCM
);
node_maps
->
LhsHashTable
(
ntype
).
FillWithDuplicates
(
node_maps
->
LhsHashTable
(
ntype
).
FillWithDuplicates
(
nodes
.
Ptr
<
IdType
>
(),
nodes
.
Ptr
<
IdType
>
(),
nodes
->
shape
[
0
],
nodes
->
shape
[
0
],
...
@@ -127,7 +127,7 @@ class DeviceNodeMapMaker {
...
@@ -127,7 +127,7 @@ class DeviceNodeMapMaker {
for
(
int64_t
ntype
=
0
;
ntype
<
lhs_num_ntypes
;
++
ntype
)
{
for
(
int64_t
ntype
=
0
;
ntype
<
lhs_num_ntypes
;
++
ntype
)
{
const
IdArray
&
nodes
=
lhs_nodes
[
ntype
];
const
IdArray
&
nodes
=
lhs_nodes
[
ntype
];
if
(
nodes
->
shape
[
0
]
>
0
)
{
if
(
nodes
->
shape
[
0
]
>
0
)
{
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
GPU
);
CHECK_EQ
(
nodes
->
ctx
.
device_type
,
kDL
ROCM
);
node_maps
->
LhsHashTable
(
ntype
).
FillWithUnique
(
node_maps
->
LhsHashTable
(
ntype
).
FillWithUnique
(
nodes
.
Ptr
<
IdType
>
(),
nodes
.
Ptr
<
IdType
>
(),
nodes
->
shape
[
0
],
nodes
->
shape
[
0
],
...
@@ -154,7 +154,7 @@ class DeviceNodeMapMaker {
...
@@ -154,7 +154,7 @@ class DeviceNodeMapMaker {
// Since partial specialization is not allowed for functions, use this as an
// Since partial specialization is not allowed for functions, use this as an
// intermediate for ToBlock where XPU = kDL
GPU
.
// intermediate for ToBlock where XPU = kDL
ROCM
.
template
<
typename
IdType
>
template
<
typename
IdType
>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
ToBlockGPU
(
ToBlockGPU
(
...
@@ -170,7 +170,7 @@ ToBlockGPU(
...
@@ -170,7 +170,7 @@ ToBlockGPU(
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
hipStream_t
stream
=
runtime
::
getCurrentCUDAStream
();
hipStream_t
stream
=
runtime
::
getCurrentCUDAStream
();
CHECK_EQ
(
ctx
.
device_type
,
kDL
GPU
);
CHECK_EQ
(
ctx
.
device_type
,
kDL
ROCM
);
for
(
const
auto
&
nodes
:
rhs_nodes
)
{
for
(
const
auto
&
nodes
:
rhs_nodes
)
{
CHECK_EQ
(
ctx
.
device_type
,
nodes
->
ctx
.
device_type
);
CHECK_EQ
(
ctx
.
device_type
,
nodes
->
ctx
.
device_type
);
}
}
...
@@ -383,7 +383,7 @@ ToBlockGPU(
...
@@ -383,7 +383,7 @@ ToBlockGPU(
// functions are the same.
// functions are the same.
// Using template<> fails to export the symbols.
// Using template<> fails to export the symbols.
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
// ToBlock<kDL
GPU
, int32_t>
// ToBlock<kDL
ROCM
, int32_t>
ToBlockGPU32
(
ToBlockGPU32
(
HeteroGraphPtr
graph
,
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
...
@@ -393,7 +393,7 @@ ToBlockGPU32(
...
@@ -393,7 +393,7 @@ ToBlockGPU32(
}
}
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
// ToBlock<kDL
GPU
, int64_t>
// ToBlock<kDL
ROCM
, int64_t>
ToBlockGPU64
(
ToBlockGPU64
(
HeteroGraphPtr
graph
,
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
...
...
src/graph/transform/cuda/knn.cu
View file @
aaaecbc9
...
@@ -923,36 +923,36 @@ void NNDescent(const NDArray& points, const IdArray& offsets,
...
@@ -923,36 +923,36 @@ void NNDescent(const NDArray& points, const IdArray& offsets,
device
->
FreeWorkspace
(
ctx
,
sum_temp_storage
);
device
->
FreeWorkspace
(
ctx
,
sum_temp_storage
);
}
}
template
void
KNN
<
kDL
GPU
,
float
,
int32_t
>(
template
void
KNN
<
kDL
ROCM
,
float
,
int32_t
>(
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
template
void
KNN
<
kDL
GPU
,
float
,
int64_t
>(
template
void
KNN
<
kDL
ROCM
,
float
,
int64_t
>(
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
template
void
KNN
<
kDL
GPU
,
double
,
int32_t
>(
template
void
KNN
<
kDL
ROCM
,
double
,
int32_t
>(
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
template
void
KNN
<
kDL
GPU
,
double
,
int64_t
>(
template
void
KNN
<
kDL
ROCM
,
double
,
int64_t
>(
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
data_points
,
const
IdArray
&
data_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
NDArray
&
query_points
,
const
IdArray
&
query_offsets
,
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
const
int
k
,
IdArray
result
,
const
std
::
string
&
algorithm
);
template
void
NNDescent
<
kDL
GPU
,
float
,
int32_t
>(
template
void
NNDescent
<
kDL
ROCM
,
float
,
int32_t
>(
const
NDArray
&
points
,
const
IdArray
&
offsets
,
const
NDArray
&
points
,
const
IdArray
&
offsets
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
const
int
num_candidates
,
const
double
delta
);
const
int
num_candidates
,
const
double
delta
);
template
void
NNDescent
<
kDL
GPU
,
float
,
int64_t
>(
template
void
NNDescent
<
kDL
ROCM
,
float
,
int64_t
>(
const
NDArray
&
points
,
const
IdArray
&
offsets
,
const
NDArray
&
points
,
const
IdArray
&
offsets
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
const
int
num_candidates
,
const
double
delta
);
const
int
num_candidates
,
const
double
delta
);
template
void
NNDescent
<
kDL
GPU
,
double
,
int32_t
>(
template
void
NNDescent
<
kDL
ROCM
,
double
,
int32_t
>(
const
NDArray
&
points
,
const
IdArray
&
offsets
,
const
NDArray
&
points
,
const
IdArray
&
offsets
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
const
int
num_candidates
,
const
double
delta
);
const
int
num_candidates
,
const
double
delta
);
template
void
NNDescent
<
kDL
GPU
,
double
,
int64_t
>(
template
void
NNDescent
<
kDL
ROCM
,
double
,
int64_t
>(
const
NDArray
&
points
,
const
IdArray
&
offsets
,
const
NDArray
&
points
,
const
IdArray
&
offsets
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
IdArray
result
,
const
int
k
,
const
int
num_iters
,
const
int
num_candidates
,
const
double
delta
);
const
int
num_candidates
,
const
double
delta
);
...
...
src/graph/transform/to_bipartite.cc
View file @
aaaecbc9
...
@@ -172,7 +172,7 @@ ToBlockGPU64(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdAr
...
@@ -172,7 +172,7 @@ ToBlockGPU64(HeteroGraphPtr, const std::vector<IdArray>&, bool, std::vector<IdAr
template
<
>
template
<
>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
ToBlock
<
kDL
GPU
,
int32_t
>
(
HeteroGraphPtr
graph
,
ToBlock
<
kDL
ROCM
,
int32_t
>
(
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
bool
include_rhs_in_lhs
,
bool
include_rhs_in_lhs
,
std
::
vector
<
IdArray
>*
const
lhs_nodes
)
{
std
::
vector
<
IdArray
>*
const
lhs_nodes
)
{
...
@@ -181,7 +181,7 @@ ToBlock<kDLGPU, int32_t>(HeteroGraphPtr graph,
...
@@ -181,7 +181,7 @@ ToBlock<kDLGPU, int32_t>(HeteroGraphPtr graph,
template
<
>
template
<
>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
std
::
tuple
<
HeteroGraphPtr
,
std
::
vector
<
IdArray
>>
ToBlock
<
kDL
GPU
,
int64_t
>
(
HeteroGraphPtr
graph
,
ToBlock
<
kDL
ROCM
,
int64_t
>
(
HeteroGraphPtr
graph
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
const
std
::
vector
<
IdArray
>
&
rhs_nodes
,
bool
include_rhs_in_lhs
,
bool
include_rhs_in_lhs
,
std
::
vector
<
IdArray
>*
const
lhs_nodes
)
{
std
::
vector
<
IdArray
>*
const
lhs_nodes
)
{
...
...
src/graph/unit_graph.h
View file @
aaaecbc9
...
@@ -214,7 +214,7 @@ class UnitGraph : public BaseHeteroGraph {
...
@@ -214,7 +214,7 @@ class UnitGraph : public BaseHeteroGraph {
* \note The graph will be pinned inplace. Behavior depends on the current context,
* \note The graph will be pinned inplace. Behavior depends on the current context,
* kDLCPU: will be pinned;
* kDLCPU: will be pinned;
* IsPinned: directly return;
* IsPinned: directly return;
* kDL
GPU
: invalid, will throw an error.
* kDL
ROCM
: invalid, will throw an error.
* The context check is deferred to pinning the NDArray.
* The context check is deferred to pinning the NDArray.
*/
*/
void
PinMemory_
()
override
;
void
PinMemory_
()
override
;
...
...
src/partition/cuda/partition_op.cu
View file @
aaaecbc9
...
@@ -377,12 +377,12 @@ GeneratePermutationFromRemainder(
...
@@ -377,12 +377,12 @@ GeneratePermutationFromRemainder(
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRemainder
<
kDL
GPU
,
int32_t
>
(
GeneratePermutationFromRemainder
<
kDL
ROCM
,
int32_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
in_idx
);
IdArray
in_idx
);
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRemainder
<
kDL
GPU
,
int64_t
>
(
GeneratePermutationFromRemainder
<
kDL
ROCM
,
int64_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
in_idx
);
IdArray
in_idx
);
...
@@ -421,11 +421,11 @@ IdArray MapToLocalFromRemainder(
...
@@ -421,11 +421,11 @@ IdArray MapToLocalFromRemainder(
}
}
template
IdArray
template
IdArray
MapToLocalFromRemainder
<
kDL
GPU
,
int32_t
>(
MapToLocalFromRemainder
<
kDL
ROCM
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
in_idx
);
IdArray
in_idx
);
template
IdArray
template
IdArray
MapToLocalFromRemainder
<
kDL
GPU
,
int64_t
>(
MapToLocalFromRemainder
<
kDL
ROCM
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
in_idx
);
IdArray
in_idx
);
...
@@ -469,12 +469,12 @@ IdArray MapToGlobalFromRemainder(
...
@@ -469,12 +469,12 @@ IdArray MapToGlobalFromRemainder(
}
}
template
IdArray
template
IdArray
MapToGlobalFromRemainder
<
kDL
GPU
,
int32_t
>(
MapToGlobalFromRemainder
<
kDL
ROCM
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
in_idx
,
IdArray
in_idx
,
int
part_id
);
int
part_id
);
template
IdArray
template
IdArray
MapToGlobalFromRemainder
<
kDL
GPU
,
int64_t
>(
MapToGlobalFromRemainder
<
kDL
ROCM
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
in_idx
,
IdArray
in_idx
,
int
part_id
);
int
part_id
);
...
@@ -599,25 +599,25 @@ GeneratePermutationFromRange(
...
@@ -599,25 +599,25 @@ GeneratePermutationFromRange(
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRange
<
kDL
GPU
,
int32_t
,
int32_t
>
(
GeneratePermutationFromRange
<
kDL
ROCM
,
int32_t
,
int32_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRange
<
kDL
GPU
,
int64_t
,
int32_t
>
(
GeneratePermutationFromRange
<
kDL
ROCM
,
int64_t
,
int32_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRange
<
kDL
GPU
,
int32_t
,
int64_t
>
(
GeneratePermutationFromRange
<
kDL
ROCM
,
int32_t
,
int64_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
std
::
pair
<
IdArray
,
IdArray
>
template
std
::
pair
<
IdArray
,
IdArray
>
GeneratePermutationFromRange
<
kDL
GPU
,
int64_t
,
int64_t
>
(
GeneratePermutationFromRange
<
kDL
ROCM
,
int64_t
,
int64_t
>
(
int64_t
array_size
,
int64_t
array_size
,
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
...
@@ -658,22 +658,22 @@ IdArray MapToLocalFromRange(
...
@@ -658,22 +658,22 @@ IdArray MapToLocalFromRange(
}
}
template
IdArray
template
IdArray
MapToLocalFromRange
<
kDL
GPU
,
int32_t
,
int32_t
>(
MapToLocalFromRange
<
kDL
ROCM
,
int32_t
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
IdArray
template
IdArray
MapToLocalFromRange
<
kDL
GPU
,
int64_t
,
int32_t
>(
MapToLocalFromRange
<
kDL
ROCM
,
int64_t
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
IdArray
template
IdArray
MapToLocalFromRange
<
kDL
GPU
,
int32_t
,
int64_t
>(
MapToLocalFromRange
<
kDL
ROCM
,
int32_t
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
template
IdArray
template
IdArray
MapToLocalFromRange
<
kDL
GPU
,
int64_t
,
int64_t
>(
MapToLocalFromRange
<
kDL
ROCM
,
int64_t
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
);
IdArray
in_idx
);
...
@@ -721,25 +721,25 @@ IdArray MapToGlobalFromRange(
...
@@ -721,25 +721,25 @@ IdArray MapToGlobalFromRange(
}
}
template
IdArray
template
IdArray
MapToGlobalFromRange
<
kDL
GPU
,
int32_t
,
int32_t
>(
MapToGlobalFromRange
<
kDL
ROCM
,
int32_t
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
,
IdArray
in_idx
,
int
part_id
);
int
part_id
);
template
IdArray
template
IdArray
MapToGlobalFromRange
<
kDL
GPU
,
int64_t
,
int32_t
>(
MapToGlobalFromRange
<
kDL
ROCM
,
int64_t
,
int32_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
,
IdArray
in_idx
,
int
part_id
);
int
part_id
);
template
IdArray
template
IdArray
MapToGlobalFromRange
<
kDL
GPU
,
int32_t
,
int64_t
>(
MapToGlobalFromRange
<
kDL
ROCM
,
int32_t
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
,
IdArray
in_idx
,
int
part_id
);
int
part_id
);
template
IdArray
template
IdArray
MapToGlobalFromRange
<
kDL
GPU
,
int64_t
,
int64_t
>(
MapToGlobalFromRange
<
kDL
ROCM
,
int64_t
,
int64_t
>(
int
num_parts
,
int
num_parts
,
IdArray
range
,
IdArray
range
,
IdArray
in_idx
,
IdArray
in_idx
,
...
...
src/partition/ndarray_partition.cc
View file @
aaaecbc9
...
@@ -46,9 +46,9 @@ class RemainderPartition : public NDArrayPartition {
...
@@ -46,9 +46,9 @@ class RemainderPartition : public NDArrayPartition {
IdArray
in_idx
)
const
override
{
IdArray
in_idx
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
return
impl
::
GeneratePermutationFromRemainder
<
kDL
GPU
,
IdType
>
(
return
impl
::
GeneratePermutationFromRemainder
<
kDL
ROCM
,
IdType
>
(
ArraySize
(),
NumParts
(),
in_idx
);
ArraySize
(),
NumParts
(),
in_idx
);
});
});
}
}
...
@@ -64,9 +64,9 @@ class RemainderPartition : public NDArrayPartition {
...
@@ -64,9 +64,9 @@ class RemainderPartition : public NDArrayPartition {
IdArray
in_idx
)
const
override
{
IdArray
in_idx
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
return
impl
::
MapToLocalFromRemainder
<
kDL
GPU
,
IdType
>
(
return
impl
::
MapToLocalFromRemainder
<
kDL
ROCM
,
IdType
>
(
NumParts
(),
in_idx
);
NumParts
(),
in_idx
);
});
});
}
}
...
@@ -83,9 +83,9 @@ class RemainderPartition : public NDArrayPartition {
...
@@ -83,9 +83,9 @@ class RemainderPartition : public NDArrayPartition {
const
int
part_id
)
const
override
{
const
int
part_id
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
return
impl
::
MapToGlobalFromRemainder
<
kDL
GPU
,
IdType
>
(
return
impl
::
MapToGlobalFromRemainder
<
kDL
ROCM
,
IdType
>
(
NumParts
(),
in_idx
,
part_id
);
NumParts
(),
in_idx
,
part_id
);
});
});
}
}
...
@@ -118,7 +118,7 @@ class RangePartition : public NDArrayPartition {
...
@@ -118,7 +118,7 @@ class RangePartition : public NDArrayPartition {
// have only one CPU context, and can safely copy the array to that.
// have only one CPU context, and can safely copy the array to that.
range_cpu_
(
range
.
CopyTo
(
DGLContext
{
kDLCPU
,
0
}))
{
range_cpu_
(
range
.
CopyTo
(
DGLContext
{
kDLCPU
,
0
}))
{
auto
ctx
=
range
->
ctx
;
auto
ctx
=
range
->
ctx
;
if
(
ctx
.
device_type
!=
kDL
GPU
)
{
if
(
ctx
.
device_type
!=
kDL
ROCM
)
{
LOG
(
FATAL
)
<<
"The range for an NDArrayPartition is only supported "
LOG
(
FATAL
)
<<
"The range for an NDArrayPartition is only supported "
" on GPUs. Transfer the range to the target device before "
" on GPUs. Transfer the range to the target device before "
"creating the partition."
;
"creating the partition."
;
...
@@ -130,7 +130,7 @@ class RangePartition : public NDArrayPartition {
...
@@ -130,7 +130,7 @@ class RangePartition : public NDArrayPartition {
IdArray
in_idx
)
const
override
{
IdArray
in_idx
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
if
(
ctx
.
device_type
!=
range_
->
ctx
.
device_type
||
if
(
ctx
.
device_type
!=
range_
->
ctx
.
device_type
||
ctx
.
device_id
!=
range_
->
ctx
.
device_id
)
{
ctx
.
device_id
!=
range_
->
ctx
.
device_id
)
{
LOG
(
FATAL
)
<<
"The range for the NDArrayPartition and the input "
LOG
(
FATAL
)
<<
"The range for the NDArrayPartition and the input "
...
@@ -138,7 +138,7 @@ class RangePartition : public NDArrayPartition {
...
@@ -138,7 +138,7 @@ class RangePartition : public NDArrayPartition {
}
}
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
return
impl
::
GeneratePermutationFromRange
<
kDL
GPU
,
IdType
,
RangeType
>
(
return
impl
::
GeneratePermutationFromRange
<
kDL
ROCM
,
IdType
,
RangeType
>
(
ArraySize
(),
NumParts
(),
range_
,
in_idx
);
ArraySize
(),
NumParts
(),
range_
,
in_idx
);
});
});
});
});
...
@@ -155,10 +155,10 @@ class RangePartition : public NDArrayPartition {
...
@@ -155,10 +155,10 @@ class RangePartition : public NDArrayPartition {
IdArray
in_idx
)
const
override
{
IdArray
in_idx
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
return
impl
::
MapToLocalFromRange
<
kDL
GPU
,
IdType
,
RangeType
>
(
return
impl
::
MapToLocalFromRange
<
kDL
ROCM
,
IdType
,
RangeType
>
(
NumParts
(),
range_
,
in_idx
);
NumParts
(),
range_
,
in_idx
);
});
});
});
});
...
@@ -176,10 +176,10 @@ class RangePartition : public NDArrayPartition {
...
@@ -176,10 +176,10 @@ class RangePartition : public NDArrayPartition {
const
int
part_id
)
const
override
{
const
int
part_id
)
const
override
{
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
auto
ctx
=
in_idx
->
ctx
;
auto
ctx
=
in_idx
->
ctx
;
if
(
ctx
.
device_type
==
kDL
GPU
)
{
if
(
ctx
.
device_type
==
kDL
ROCM
)
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
in_idx
->
dtype
,
IdType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
ATEN_ID_TYPE_SWITCH
(
range_
->
dtype
,
RangeType
,
{
return
impl
::
MapToGlobalFromRange
<
kDL
GPU
,
IdType
,
RangeType
>
(
return
impl
::
MapToGlobalFromRange
<
kDL
ROCM
,
IdType
,
RangeType
>
(
NumParts
(),
range_
,
in_idx
,
part_id
);
NumParts
(),
range_
,
in_idx
,
part_id
);
});
});
});
});
...
...
src/random/random.cc
View file @
aaaecbc9
...
@@ -29,7 +29,7 @@ DGL_REGISTER_GLOBAL("rng._CAPI_SetSeed")
...
@@ -29,7 +29,7 @@ DGL_REGISTER_GLOBAL("rng._CAPI_SetSeed")
}
}
});
});
#ifdef DGL_USE_CUDA
#ifdef DGL_USE_CUDA
if
(
DeviceAPI
::
Get
(
kDL
GPU
)
->
IsAvailable
())
{
if
(
DeviceAPI
::
Get
(
kDL
ROCM
)
->
IsAvailable
())
{
auto
*
thr_entry
=
CUDAThreadEntry
::
ThreadLocal
();
auto
*
thr_entry
=
CUDAThreadEntry
::
ThreadLocal
();
if
(
!
thr_entry
->
curand_gen
)
{
if
(
!
thr_entry
->
curand_gen
)
{
CURAND_CALL
(
hiprandCreateGenerator
(
&
thr_entry
->
curand_gen
,
HIPRAND_RNG_PSEUDO_DEFAULT
));
CURAND_CALL
(
hiprandCreateGenerator
(
&
thr_entry
->
curand_gen
,
HIPRAND_RNG_PSEUDO_DEFAULT
));
...
...
src/runtime/c_runtime_api.cc
View file @
aaaecbc9
...
@@ -27,7 +27,7 @@ namespace runtime {
...
@@ -27,7 +27,7 @@ namespace runtime {
inline
std
::
string
DeviceName
(
int
type
)
{
inline
std
::
string
DeviceName
(
int
type
)
{
switch
(
type
)
{
switch
(
type
)
{
case
kDLCPU
:
return
"cpu"
;
case
kDLCPU
:
return
"cpu"
;
case
kDL
GPU
:
return
"gpu"
;
case
kDL
ROCM
:
return
"gpu"
;
case
kDLOpenCL
:
return
"opencl"
;
case
kDLOpenCL
:
return
"opencl"
;
case
kDLSDAccel
:
return
"sdaccel"
;
case
kDLSDAccel
:
return
"sdaccel"
;
case
kDLAOCL
:
return
"aocl"
;
case
kDLAOCL
:
return
"aocl"
;
...
...
src/runtime/cuda/cuda_device_api.cc
View file @
aaaecbc9
...
@@ -141,7 +141,7 @@ class CUDADeviceAPI final : public DeviceAPI {
...
@@ -141,7 +141,7 @@ class CUDADeviceAPI final : public DeviceAPI {
hipStream_t
cu_stream
=
static_cast
<
hipStream_t
>
(
stream
);
hipStream_t
cu_stream
=
static_cast
<
hipStream_t
>
(
stream
);
from
=
static_cast
<
const
char
*>
(
from
)
+
from_offset
;
from
=
static_cast
<
const
char
*>
(
from
)
+
from_offset
;
to
=
static_cast
<
char
*>
(
to
)
+
to_offset
;
to
=
static_cast
<
char
*>
(
to
)
+
to_offset
;
if
(
ctx_from
.
device_type
==
kDL
GPU
&&
ctx_to
.
device_type
==
kDL
GPU
)
{
if
(
ctx_from
.
device_type
==
kDL
ROCM
&&
ctx_to
.
device_type
==
kDL
ROCM
)
{
CUDA_CALL
(
hipSetDevice
(
ctx_from
.
device_id
));
CUDA_CALL
(
hipSetDevice
(
ctx_from
.
device_id
));
if
(
ctx_from
.
device_id
==
ctx_to
.
device_id
)
{
if
(
ctx_from
.
device_id
==
ctx_to
.
device_id
)
{
GPUCopy
(
from
,
to
,
size
,
hipMemcpyDeviceToDevice
,
cu_stream
);
GPUCopy
(
from
,
to
,
size
,
hipMemcpyDeviceToDevice
,
cu_stream
);
...
@@ -150,10 +150,10 @@ class CUDADeviceAPI final : public DeviceAPI {
...
@@ -150,10 +150,10 @@ class CUDADeviceAPI final : public DeviceAPI {
from
,
ctx_from
.
device_id
,
from
,
ctx_from
.
device_id
,
size
,
cu_stream
));
size
,
cu_stream
));
}
}
}
else
if
(
ctx_from
.
device_type
==
kDL
GPU
&&
ctx_to
.
device_type
==
kDLCPU
)
{
}
else
if
(
ctx_from
.
device_type
==
kDL
ROCM
&&
ctx_to
.
device_type
==
kDLCPU
)
{
CUDA_CALL
(
hipSetDevice
(
ctx_from
.
device_id
));
CUDA_CALL
(
hipSetDevice
(
ctx_from
.
device_id
));
GPUCopy
(
from
,
to
,
size
,
hipMemcpyDeviceToHost
,
cu_stream
);
GPUCopy
(
from
,
to
,
size
,
hipMemcpyDeviceToHost
,
cu_stream
);
}
else
if
(
ctx_from
.
device_type
==
kDLCPU
&&
ctx_to
.
device_type
==
kDL
GPU
)
{
}
else
if
(
ctx_from
.
device_type
==
kDLCPU
&&
ctx_to
.
device_type
==
kDL
ROCM
)
{
CUDA_CALL
(
hipSetDevice
(
ctx_to
.
device_id
));
CUDA_CALL
(
hipSetDevice
(
ctx_to
.
device_id
));
GPUCopy
(
from
,
to
,
size
,
hipMemcpyHostToDevice
,
cu_stream
);
GPUCopy
(
from
,
to
,
size
,
hipMemcpyHostToDevice
,
cu_stream
);
}
else
{
}
else
{
...
@@ -314,7 +314,7 @@ class CUDADeviceAPI final : public DeviceAPI {
...
@@ -314,7 +314,7 @@ class CUDADeviceAPI final : public DeviceAPI {
typedef
dmlc
::
ThreadLocalStore
<
CUDAThreadEntry
>
CUDAThreadStore
;
typedef
dmlc
::
ThreadLocalStore
<
CUDAThreadEntry
>
CUDAThreadStore
;
CUDAThreadEntry
::
CUDAThreadEntry
()
CUDAThreadEntry
::
CUDAThreadEntry
()
:
pool
(
kDL
GPU
,
CUDADeviceAPI
::
Global
())
{
:
pool
(
kDL
ROCM
,
CUDADeviceAPI
::
Global
())
{
}
}
CUDAThreadEntry
*
CUDAThreadEntry
::
ThreadLocal
()
{
CUDAThreadEntry
*
CUDAThreadEntry
::
ThreadLocal
()
{
...
...
src/runtime/cuda/nccl_api.cu
View file @
aaaecbc9
...
@@ -618,7 +618,7 @@ void NCCLCommunicator::AllToAllV(
...
@@ -618,7 +618,7 @@ void NCCLCommunicator::AllToAllV(
int
dev_id
;
int
dev_id
;
CUDA_CALL
(
hipGetDevice
(
&
dev_id
));
CUDA_CALL
(
hipGetDevice
(
&
dev_id
));
DGLContext
ctx
{
kDL
GPU
,
dev_id
};
DGLContext
ctx
{
kDL
ROCM
,
dev_id
};
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
dtype
=
DLDataTypeTraits
<
DType
>::
dtype
;
auto
dtype
=
DLDataTypeTraits
<
DType
>::
dtype
;
...
@@ -680,7 +680,7 @@ void NCCLCommunicator::AllToAll(
...
@@ -680,7 +680,7 @@ void NCCLCommunicator::AllToAll(
#else
#else
int
dev_id
;
int
dev_id
;
CUDA_CALL
(
hipGetDevice
(
&
dev_id
));
CUDA_CALL
(
hipGetDevice
(
&
dev_id
));
DGLContext
ctx
{
kDL
GPU
,
dev_id
};
DGLContext
ctx
{
kDL
ROCM
,
dev_id
};
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
device
=
runtime
::
DeviceAPI
::
Get
(
ctx
);
auto
dtype
=
DLDataTypeTraits
<
IdType
>::
dtype
;
auto
dtype
=
DLDataTypeTraits
<
IdType
>::
dtype
;
...
...
src/runtime/ndarray.cc
View file @
aaaecbc9
...
@@ -262,7 +262,7 @@ void NDArray::PinContainer(NDArray::Container* ptr) {
...
@@ -262,7 +262,7 @@ void NDArray::PinContainer(NDArray::Container* ptr) {
auto
*
tensor
=
&
(
ptr
->
dl_tensor
);
auto
*
tensor
=
&
(
ptr
->
dl_tensor
);
CHECK_EQ
(
tensor
->
ctx
.
device_type
,
kDLCPU
)
CHECK_EQ
(
tensor
->
ctx
.
device_type
,
kDLCPU
)
<<
"Only NDArray on CPU can be pinned"
;
<<
"Only NDArray on CPU can be pinned"
;
DeviceAPI
::
Get
(
kDL
GPU
)
->
PinData
(
tensor
->
data
,
GetDataSize
(
*
tensor
));
DeviceAPI
::
Get
(
kDL
ROCM
)
->
PinData
(
tensor
->
data
,
GetDataSize
(
*
tensor
));
ptr
->
pinned_by_dgl_
=
true
;
ptr
->
pinned_by_dgl_
=
true
;
}
}
...
@@ -275,14 +275,14 @@ void NDArray::UnpinContainer(NDArray::Container* ptr) {
...
@@ -275,14 +275,14 @@ void NDArray::UnpinContainer(NDArray::Container* ptr) {
// 1. not pinned, do nothing
// 1. not pinned, do nothing
if
(
!
container_is_pinned
)
return
;
if
(
!
container_is_pinned
)
return
;
// 2. pinned by DGL, unpin it
// 2. pinned by DGL, unpin it
DeviceAPI
::
Get
(
kDL
GPU
)
->
UnpinData
(
ptr
->
dl_tensor
.
data
);
DeviceAPI
::
Get
(
kDL
ROCM
)
->
UnpinData
(
ptr
->
dl_tensor
.
data
);
ptr
->
pinned_by_dgl_
=
false
;
ptr
->
pinned_by_dgl_
=
false
;
}
}
void
NDArray
::
RecordStream
(
DGLArray
*
tensor
,
DGLStreamHandle
stream
)
{
void
NDArray
::
RecordStream
(
DGLArray
*
tensor
,
DGLStreamHandle
stream
)
{
TensorDispatcher
*
td
=
TensorDispatcher
::
Global
();
TensorDispatcher
*
td
=
TensorDispatcher
::
Global
();
CHECK
(
td
->
IsAvailable
())
<<
"RecordStream only works when TensorAdaptor is available."
;
CHECK
(
td
->
IsAvailable
())
<<
"RecordStream only works when TensorAdaptor is available."
;
CHECK_EQ
(
tensor
->
ctx
.
device_type
,
kDL
GPU
)
CHECK_EQ
(
tensor
->
ctx
.
device_type
,
kDL
ROCM
)
<<
"RecordStream only works with GPU tensors."
;
<<
"RecordStream only works with GPU tensors."
;
td
->
RecordStream
(
tensor
->
data
,
stream
,
tensor
->
ctx
.
device_id
);
td
->
RecordStream
(
tensor
->
data
,
stream
,
tensor
->
ctx
.
device_id
);
...
@@ -353,7 +353,7 @@ bool NDArray::IsContainerPinned(NDArray::Container* ptr) {
...
@@ -353,7 +353,7 @@ bool NDArray::IsContainerPinned(NDArray::Container* ptr) {
if
(
tensor
->
ctx
.
device_type
!=
kDLCPU
)
if
(
tensor
->
ctx
.
device_type
!=
kDLCPU
)
return
false
;
return
false
;
// ... and CUDA device API is enabled, and the tensor is indeed in pinned memory.
// ... and CUDA device API is enabled, and the tensor is indeed in pinned memory.
auto
device
=
DeviceAPI
::
Get
(
kDL
GPU
,
true
);
auto
device
=
DeviceAPI
::
Get
(
kDL
ROCM
,
true
);
return
device
&&
device
->
IsPinned
(
tensor
->
data
);
return
device
&&
device
->
IsPinned
(
tensor
->
data
);
}
}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment