Commit 0235a31a authored by lisj's avatar lisj
Browse files

适配dtk23.04-km

parent fda4567b
...@@ -223,7 +223,7 @@ if(USE_HIP) ...@@ -223,7 +223,7 @@ if(USE_HIP)
dgl_config_hip(DGL_CUDA_SRC) dgl_config_hip(DGL_CUDA_SRC)
list(APPEND DGL_SRC ${DGL_CUDA_SRC}) list(APPEND DGL_SRC ${DGL_CUDA_SRC})
set(HIP_HIPCC_FLAGS "-std=c++17") set(HIP_HIPCC_FLAGS "-std=c++17")
hip_add_library(dgl SHARED ${DGL_SRC}) hip_add_library(dgl SHARED ${DGL_SRC} HIPCC_OPTIONS "-DDGL_USE_CUDA" "-DCUDART_VERSION_LT_11000=true")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include") target_include_directories(dgl PRIVATE "${ROCM_PATH}/include")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/hiprand") target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/hiprand")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/rocrand") target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/rocrand")
......
...@@ -16,7 +16,7 @@ pip install dgl* (下载的dgl的whl包) ...@@ -16,7 +16,7 @@ pip install dgl* (下载的dgl的whl包)
### 使用源码安装 ### 使用源码安装
#### 编译环境准备 #### 编译环境准备
```shell ```shell
pip install setuptools=59.5.0 wheel pip install setuptools==59.5.0 wheel
``` ```
#### 编译安装 #### 编译安装
......
...@@ -10,70 +10,10 @@ macro(dgl_config_hip out_variable) ...@@ -10,70 +10,10 @@ macro(dgl_config_hip out_variable)
# avoid global retrigger of cmake # avoid global retrigger of cmake
include_directories(${CUDA_INCLUDE_DIRS}) include_directories(${CUDA_INCLUDE_DIRS})
add_definitions(-DDGL_USE_CUDA)
# src/array/cuda/spmm_hetero.cu
add_definitions(-DCUDART_VERSION_LT_11000=true)
# set(DGL_DEBUG_SRC
# src/array/cuda/csr_transpose.cc
# src/array/cuda/array_cumsum.cu
# src/array/cuda/array_index_select.cu
# src/array/cuda/array_nonzero.cu
# src/array/cuda/array_op_impl.cu
# src/array/cuda/array_scatter.cu
# src/array/cuda/array_sort.cu
# src/array/cuda/coo2csr.cu
# src/array/cuda/coo_sort.cu
# src/array/cuda/csr2coo.cu
# src/array/cuda/csr_get_data.cu
# src/array/cuda/csr_mm.cu
# src/array/cuda/csr_sort.cu
# src/array/cuda/csr_sum.cu
# src/array/cuda/cuda_filter.cu
# src/array/cuda/disjoint_union.cu
# src/array/cuda/gather_mm.cu
# src/array/cuda/negative_sampling.cu
# src/array/cuda/rowwise_sampling.cu
# src/array/cuda/rowwise_sampling_prob.cu
# src/array/cuda/sddmm.cu
# src/array/cuda/sddmm_hetero_coo.cu
# src/array/cuda/sddmm_hetero_csr.cu
# src/array/cuda/segment_reduce.cu
# src/array/cuda/spmat_op_impl_coo.cu
# src/array/cuda/spmat_op_impl_csr.cu
# src/array/cuda/spmm.cu
# src/array/cuda/spmm_hetero.cu
# src/array/cuda/utils.cu
# src/array/cuda/uvm/array_index_select_uvm.cu
# src/partition/cuda/partition_op.cu
# src/runtime/cuda/cuda_device_api.cc
# src/runtime/cuda/cuda_hashtable.cu
# src/runtime/cuda/nccl_api.cu
# src/geometry/cuda/geometry_op_impl.cu
# src/graph/transform/cuda/cuda_compact_graph.cu
# src/graph/transform/cuda/cuda_to_block.cu
# src/graph/transform/cuda/knn.cu
# src/graph/sampling/randomwalks/frequency_hashmap.cu
# src/graph/sampling/randomwalks/get_node_types_gpu.cu
# src/graph/sampling/randomwalks/randomwalk_gpu.cu
# )
set_source_files_properties(src/random/random.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) set_source_files_properties(src/random/random.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(src/array/cuda/csr_transpose.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) set_source_files_properties(src/array/cuda/csr_transpose.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(src/runtime/cuda/cuda_device_api.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) set_source_files_properties(src/runtime/cuda/cuda_device_api.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# set(${out_variable} ${DGL_DEBUG_SRC})
file(GLOB_RECURSE DGL_HIP_SRC file(GLOB_RECURSE DGL_HIP_SRC
src/array/cuda/*.cc src/array/cuda/*.cc
src/array/cuda/*.cu src/array/cuda/*.cu
......
...@@ -100,7 +100,7 @@ __global__ void GatherMMScatterKernel( ...@@ -100,7 +100,7 @@ __global__ void GatherMMScatterKernel(
// Load A in shared mem in a coalesced way // Load A in shared mem in a coalesced way
for (unsigned int l = laneId; l < a_tile; l += 32) for (unsigned int l = laneId; l < a_tile; l += 32)
sh_A[local_row * sh_a_tile + l] = A[cur_rowA * in_len + (k_start + l)]; sh_A[local_row * sh_a_tile + l] = A[cur_rowA * in_len + (k_start + l)];
__syncwarp(); __threadfence_block();
for (unsigned int outloop = 0; outloop < out_len; outloop +=32) { for (unsigned int outloop = 0; outloop < out_len; outloop +=32) {
DType out_reg = 0; // thread private DType out_reg = 0; // thread private
...@@ -161,7 +161,7 @@ __global__ void GatherMMScatterKernel2( ...@@ -161,7 +161,7 @@ __global__ void GatherMMScatterKernel2(
/* Load A in shared mem in a coalesced way */ /* Load A in shared mem in a coalesced way */
for (unsigned int l = laneId; l < a_tile; l += 32) for (unsigned int l = laneId; l < a_tile; l += 32)
sh_A[local_row * sh_a_tile + l] = A[row_a * in_len + (k_start + l)]; sh_A[local_row * sh_a_tile + l] = A[row_a * in_len + (k_start + l)];
__syncwarp(); __threadfence_block();
for (unsigned int outloop = 0; outloop < out_len; outloop +=32) { for (unsigned int outloop = 0; outloop < out_len; outloop +=32) {
DType out_reg = 0; // thread private DType out_reg = 0; // thread private
......
...@@ -176,7 +176,7 @@ __global__ void SDDMMCooTreeReduceKernel( ...@@ -176,7 +176,7 @@ __global__ void SDDMMCooTreeReduceKernel(
} }
#pragma unroll #pragma unroll
for (int offset = 16; offset > 0; offset /= 2) for (int offset = 16; offset > 0; offset /= 2)
val += __shfl_down_sync(full_mask, val, offset); val += __shfl_down(val, offset);
if (tx == 0) if (tx == 0)
outoff[i] = val; outoff[i] = val;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment