Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
0235a31a
Commit
0235a31a
authored
Sep 05, 2023
by
lisj
Browse files
适配dtk23.04-km
parent
fda4567b
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
5 additions
and
65 deletions
+5
-65
CMakeLists.txt
CMakeLists.txt
+1
-1
README.md
README.md
+1
-1
cmake/modules/ROCM.cmake
cmake/modules/ROCM.cmake
+0
-60
src/array/cuda/gather_mm.cu
src/array/cuda/gather_mm.cu
+2
-2
src/array/cuda/sddmm.cuh
src/array/cuda/sddmm.cuh
+1
-1
No files found.
CMakeLists.txt
View file @
0235a31a
...
@@ -223,7 +223,7 @@ if(USE_HIP)
...
@@ -223,7 +223,7 @@ if(USE_HIP)
dgl_config_hip
(
DGL_CUDA_SRC
)
dgl_config_hip
(
DGL_CUDA_SRC
)
list
(
APPEND DGL_SRC
${
DGL_CUDA_SRC
}
)
list
(
APPEND DGL_SRC
${
DGL_CUDA_SRC
}
)
set
(
HIP_HIPCC_FLAGS
"-std=c++17"
)
set
(
HIP_HIPCC_FLAGS
"-std=c++17"
)
hip_add_library
(
dgl SHARED
${
DGL_SRC
}
)
hip_add_library
(
dgl SHARED
${
DGL_SRC
}
HIPCC_OPTIONS
"-DDGL_USE_CUDA"
"-DCUDART_VERSION_LT_11000=true"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include/hiprand"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include/hiprand"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include/rocrand"
)
target_include_directories
(
dgl PRIVATE
"
${
ROCM_PATH
}
/include/rocrand"
)
...
...
README.md
View file @
0235a31a
...
@@ -16,7 +16,7 @@ pip install dgl* (下载的dgl的whl包)
...
@@ -16,7 +16,7 @@ pip install dgl* (下载的dgl的whl包)
### 使用源码安装
### 使用源码安装
#### 编译环境准备
#### 编译环境准备
```
shell
```
shell
pip
install
setuptools
=
59.5.0 wheel
pip
install
setuptools
=
=
59.5.0 wheel
```
```
#### 编译安装
#### 编译安装
...
...
cmake/modules/ROCM.cmake
View file @
0235a31a
...
@@ -10,70 +10,10 @@ macro(dgl_config_hip out_variable)
...
@@ -10,70 +10,10 @@ macro(dgl_config_hip out_variable)
# avoid global retrigger of cmake
# avoid global retrigger of cmake
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
include_directories
(
${
CUDA_INCLUDE_DIRS
}
)
add_definitions
(
-DDGL_USE_CUDA
)
# src/array/cuda/spmm_hetero.cu
add_definitions
(
-DCUDART_VERSION_LT_11000=true
)
# set(DGL_DEBUG_SRC
# src/array/cuda/csr_transpose.cc
# src/array/cuda/array_cumsum.cu
# src/array/cuda/array_index_select.cu
# src/array/cuda/array_nonzero.cu
# src/array/cuda/array_op_impl.cu
# src/array/cuda/array_scatter.cu
# src/array/cuda/array_sort.cu
# src/array/cuda/coo2csr.cu
# src/array/cuda/coo_sort.cu
# src/array/cuda/csr2coo.cu
# src/array/cuda/csr_get_data.cu
# src/array/cuda/csr_mm.cu
# src/array/cuda/csr_sort.cu
# src/array/cuda/csr_sum.cu
# src/array/cuda/cuda_filter.cu
# src/array/cuda/disjoint_union.cu
# src/array/cuda/gather_mm.cu
# src/array/cuda/negative_sampling.cu
# src/array/cuda/rowwise_sampling.cu
# src/array/cuda/rowwise_sampling_prob.cu
# src/array/cuda/sddmm.cu
# src/array/cuda/sddmm_hetero_coo.cu
# src/array/cuda/sddmm_hetero_csr.cu
# src/array/cuda/segment_reduce.cu
# src/array/cuda/spmat_op_impl_coo.cu
# src/array/cuda/spmat_op_impl_csr.cu
# src/array/cuda/spmm.cu
# src/array/cuda/spmm_hetero.cu
# src/array/cuda/utils.cu
# src/array/cuda/uvm/array_index_select_uvm.cu
# src/partition/cuda/partition_op.cu
# src/runtime/cuda/cuda_device_api.cc
# src/runtime/cuda/cuda_hashtable.cu
# src/runtime/cuda/nccl_api.cu
# src/geometry/cuda/geometry_op_impl.cu
# src/graph/transform/cuda/cuda_compact_graph.cu
# src/graph/transform/cuda/cuda_to_block.cu
# src/graph/transform/cuda/knn.cu
# src/graph/sampling/randomwalks/frequency_hashmap.cu
# src/graph/sampling/randomwalks/get_node_types_gpu.cu
# src/graph/sampling/randomwalks/randomwalk_gpu.cu
# )
set_source_files_properties
(
src/random/random.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
set_source_files_properties
(
src/random/random.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
set_source_files_properties
(
src/array/cuda/csr_transpose.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
set_source_files_properties
(
src/array/cuda/csr_transpose.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
set_source_files_properties
(
src/runtime/cuda/cuda_device_api.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
set_source_files_properties
(
src/runtime/cuda/cuda_device_api.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1
)
# set(${out_variable} ${DGL_DEBUG_SRC})
file
(
GLOB_RECURSE DGL_HIP_SRC
file
(
GLOB_RECURSE DGL_HIP_SRC
src/array/cuda/*.cc
src/array/cuda/*.cc
src/array/cuda/*.cu
src/array/cuda/*.cu
...
...
src/array/cuda/gather_mm.cu
View file @
0235a31a
...
@@ -100,7 +100,7 @@ __global__ void GatherMMScatterKernel(
...
@@ -100,7 +100,7 @@ __global__ void GatherMMScatterKernel(
// Load A in shared mem in a coalesced way
// Load A in shared mem in a coalesced way
for
(
unsigned
int
l
=
laneId
;
l
<
a_tile
;
l
+=
32
)
for
(
unsigned
int
l
=
laneId
;
l
<
a_tile
;
l
+=
32
)
sh_A
[
local_row
*
sh_a_tile
+
l
]
=
A
[
cur_rowA
*
in_len
+
(
k_start
+
l
)];
sh_A
[
local_row
*
sh_a_tile
+
l
]
=
A
[
cur_rowA
*
in_len
+
(
k_start
+
l
)];
__
syncwarp
();
__
threadfence_block
();
for
(
unsigned
int
outloop
=
0
;
outloop
<
out_len
;
outloop
+=
32
)
{
for
(
unsigned
int
outloop
=
0
;
outloop
<
out_len
;
outloop
+=
32
)
{
DType
out_reg
=
0
;
// thread private
DType
out_reg
=
0
;
// thread private
...
@@ -161,7 +161,7 @@ __global__ void GatherMMScatterKernel2(
...
@@ -161,7 +161,7 @@ __global__ void GatherMMScatterKernel2(
/* Load A in shared mem in a coalesced way */
/* Load A in shared mem in a coalesced way */
for
(
unsigned
int
l
=
laneId
;
l
<
a_tile
;
l
+=
32
)
for
(
unsigned
int
l
=
laneId
;
l
<
a_tile
;
l
+=
32
)
sh_A
[
local_row
*
sh_a_tile
+
l
]
=
A
[
row_a
*
in_len
+
(
k_start
+
l
)];
sh_A
[
local_row
*
sh_a_tile
+
l
]
=
A
[
row_a
*
in_len
+
(
k_start
+
l
)];
__
syncwarp
();
__
threadfence_block
();
for
(
unsigned
int
outloop
=
0
;
outloop
<
out_len
;
outloop
+=
32
)
{
for
(
unsigned
int
outloop
=
0
;
outloop
<
out_len
;
outloop
+=
32
)
{
DType
out_reg
=
0
;
// thread private
DType
out_reg
=
0
;
// thread private
...
...
src/array/cuda/sddmm.cuh
View file @
0235a31a
...
@@ -176,7 +176,7 @@ __global__ void SDDMMCooTreeReduceKernel(
...
@@ -176,7 +176,7 @@ __global__ void SDDMMCooTreeReduceKernel(
}
}
#pragma unroll
#pragma unroll
for
(
int
offset
=
16
;
offset
>
0
;
offset
/=
2
)
for
(
int
offset
=
16
;
offset
>
0
;
offset
/=
2
)
val
+=
__shfl_down
_sync
(
full_mask
,
val
,
offset
);
val
+=
__shfl_down
(
val
,
offset
);
if
(
tx
==
0
)
if
(
tx
==
0
)
outoff
[
i
]
=
val
;
outoff
[
i
]
=
val
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment