Commit 833803f3 authored by sangwzh's avatar sangwzh
Browse files

update dgl codes to hip

parent 1d28bf8b
...@@ -7,6 +7,7 @@ message(STATUS "Start configuring project ${PROJECT_NAME}") ...@@ -7,6 +7,7 @@ message(STATUS "Start configuring project ${PROJECT_NAME}")
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_HIP_STANDARD 17)
# cmake utils # cmake utils
include(cmake/util/Util.cmake) include(cmake/util/Util.cmake)
...@@ -64,8 +65,10 @@ dgl_feature_option( ...@@ -64,8 +65,10 @@ dgl_feature_option(
dgl_feature_option( dgl_feature_option(
USE_LIBXSMM USE_LIBXSMM
"Build with LIBXSMM library optimization" "Build with LIBXSMM library optimization"
"all" "none"
) )
message(STATUS "USE_LIBXSMM: ${USE_LIBXSMM}")
dgl_feature_option( dgl_feature_option(
USE_OPENMP USE_OPENMP
"Build with OpenMP" "Build with OpenMP"
...@@ -78,6 +81,8 @@ dgl_feature_option( ...@@ -78,6 +81,8 @@ dgl_feature_option(
"all" "all"
) )
message(STATUS "BUILD_GRAPHBOLT: ${BUILD_GRAPHBOLT}")
dgl_feature_option( dgl_feature_option(
LIBCXX_ENABLE_PARALLEL_ALGORITHMS LIBCXX_ENABLE_PARALLEL_ALGORITHMS
"Enable the parallel algorithms library. This requires the PSTL to be available." "Enable the parallel algorithms library. This requires the PSTL to be available."
...@@ -146,6 +151,29 @@ if(USE_CUDA) ...@@ -146,6 +151,29 @@ if(USE_CUDA)
cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/libcudacxx/include") cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cccl/libcudacxx/include")
endif(USE_CUDA) endif(USE_CUDA)
if(USE_HIP)
message(STATUS "Build with ROCM support")
project(dgl C CXX HIP)
include(cmake/modules/ROCM.cmake)
# target_compile_features(dgl PRIVATE cxx_std_17)
set(CMAKE_HIP_ARCHITECTURES gfx906;gfx928;gfx926)
# see https://github.com/NVIDIA/thrust/issues/1401
if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/dtk" CACHE PATH "Path to which RoCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to which ROCm has been installed")
set(HIP_PATH ${ROCM_PATH}/hip CACHE PATH "Path to which HIP has been installed")
endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
find_package(hip REQUIRED)
set(HIP_FOUND TRUE)
message(STATUS "HIP_FOUND :${HIP_FOUND}")
# add_definitions(-DTHRUST_CUB_WRAPPED_NAMESPACE=dgl)
include(cmake/modules/ROCM.cmake)
message(STATUS "Use external CUB/Thrust library for a consistent API and performance.")
endif(USE_HIP)
# initial variables # initial variables
if(NOT MSVC) if(NOT MSVC)
set(DGL_LINKER_LIBS "dl") set(DGL_LINKER_LIBS "dl")
...@@ -247,6 +275,9 @@ file(GLOB_RECURSE DGL_SRC_1 ...@@ -247,6 +275,9 @@ file(GLOB_RECURSE DGL_SRC_1
) )
list(APPEND DGL_SRC ${DGL_SRC_1}) list(APPEND DGL_SRC ${DGL_SRC_1})
if(NOT USE_HIP AND NOT USE_CUDA)
add_library(dgl SHARED ${DGL_SRC})
endif()
if (NOT MSVC) if (NOT MSVC)
file(GLOB_RECURSE DGL_RPC_SRC src/rpc/*.cc) file(GLOB_RECURSE DGL_RPC_SRC src/rpc/*.cc)
...@@ -255,6 +286,27 @@ else() ...@@ -255,6 +286,27 @@ else()
endif() endif()
list(APPEND DGL_SRC ${DGL_RPC_SRC}) list(APPEND DGL_SRC ${DGL_RPC_SRC})
# Configure hip
message(STATUS ">>>>>>>> USE_HIP: ${USE_HIP}")
message(STATUS ">>>>>>>> DGL_SRC: ${DGL_SRC}")
message(STATUS ">>>>>>>> DGL_RPC_SRC: ${DGL_RPC_SRC}")
if(USE_HIP)
dgl_config_hip(DGL_CUDA_SRC)
list(APPEND DGL_SRC ${DGL_CUDA_SRC})
set(HIP_HIPCC_FLAGS "-std=c++17")
add_library(dgl SHARED ${DGL_SRC})
target_link_options(dgl PRIVATE "-Wl,--allow-multiple-definition")
# set_target_properties(dgl PROPERTIES LINKER_LANGUAGE hip)
target_link_libraries(dgl ${DGL_LINKER_LIBS})
target_include_directories(dgl PRIVATE "${CMAKE_SOURCE_DIR}/include/dgl")
target_include_directories(dgl PRIVATE "${CMAKE_SOURCE_DIR}/include")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/hiprand")
target_include_directories(dgl PRIVATE "${ROCM_PATH}/include/rocrand")
message(STATUS ">>>>>>>> DGL_LINKER_LIBS: ${DGL_LINKER_LIBS}")
endif(USE_HIP)
# Configure cuda # Configure cuda
if(USE_CUDA) if(USE_CUDA)
file(GLOB_RECURSE DGL_CUDA_SRC file(GLOB_RECURSE DGL_CUDA_SRC
...@@ -274,12 +326,13 @@ if(USE_CUDA) ...@@ -274,12 +326,13 @@ if(USE_CUDA)
list(APPEND DGL_SRC ${DGL_CUDA_SRC}) list(APPEND DGL_SRC ${DGL_CUDA_SRC})
dgl_config_cuda(DGL_LINKER_LIBS) dgl_config_cuda(DGL_LINKER_LIBS)
cuda_add_library(dgl SHARED ${DGL_SRC}) cuda_add_library(dgl SHARED ${DGL_SRC})
else(USE_CUDA) endif()
if(NOT USE_CUDA AND NOT USE_HIP)
add_library(dgl SHARED ${DGL_SRC}) add_library(dgl SHARED ${DGL_SRC})
endif(USE_CUDA) endif()
# include directories target_include_directories(dgl PUBLIC "include")
target_include_directories(dgl PRIVATE "include") target_include_directories(dgl PUBLIC "${CMAKE_SOURCE_DIR}/include")
# check for conda includes # check for conda includes
if("$ENV{CONDA_BUILD}" STREQUAL "1") if("$ENV{CONDA_BUILD}" STREQUAL "1")
set(in_conda_build TRUE) set(in_conda_build TRUE)
...@@ -419,6 +472,20 @@ if(USE_CUDA) ...@@ -419,6 +472,20 @@ if(USE_CUDA)
target_include_directories(dgl PRIVATE "third_party/HugeCTR/gpu_cache/include") target_include_directories(dgl PRIVATE "third_party/HugeCTR/gpu_cache/include")
list(APPEND DGL_LINKER_LIBS gpu_cache) list(APPEND DGL_LINKER_LIBS gpu_cache)
message(STATUS "Build with HugeCTR GPU embedding cache.") message(STATUS "Build with HugeCTR GPU embedding cache.")
elseif(USE_HIP)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DUSE_GPU_CACHE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUSE_GPU_CACHE")
# Manually build gpu_cache because CMake always builds it as shared
file(GLOB gpu_cache_src
third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.hip
)
add_library(gpu_cache STATIC ${gpu_cache_src})
target_compile_options(gpu_cache PRIVATE "-fPIC")
set_target_properties(gpu_cache PROPERTIES LINKER_LANGUAGE HIP)
target_include_directories(gpu_cache PRIVATE "third_party/HugeCTR/gpu_cache/include")
target_include_directories(dgl PRIVATE "third_party/HugeCTR/gpu_cache/include")
list(APPEND DGL_LINKER_LIBS gpu_cache)
message(STATUS "Build with HugeCTR GPU embedding cache.")
endif(USE_CUDA) endif(USE_CUDA)
# support PARALLEL_ALGORITHMS # support PARALLEL_ALGORITHMS
...@@ -461,8 +528,8 @@ if(BUILD_TORCH) ...@@ -461,8 +528,8 @@ if(BUILD_TORCH)
tensoradapter_pytorch tensoradapter_pytorch
${CMAKE_COMMAND} -E env ${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD} CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} # CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
USE_CUDA=${USE_CUDA} USE_HIP=${USE_HIP}
EXTERNAL_DMLC_LIB_PATH=${EXTERNAL_DMLC_LIB_PATH} EXTERNAL_DMLC_LIB_PATH=${EXTERNAL_DMLC_LIB_PATH}
BINDIR=${CMAKE_CURRENT_BINARY_DIR} BINDIR=${CMAKE_CURRENT_BINARY_DIR}
bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS} bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
...@@ -491,6 +558,8 @@ if(BUILD_CPP_TEST) ...@@ -491,6 +558,8 @@ if(BUILD_CPP_TEST)
add_executable(runUnitTests ${TEST_SRC_FILES}) add_executable(runUnitTests ${TEST_SRC_FILES})
target_link_libraries(runUnitTests gtest gtest_main) target_link_libraries(runUnitTests gtest gtest_main)
target_link_libraries(runUnitTests dgl) target_link_libraries(runUnitTests dgl)
target_link_options(runUnitTests PRIVATE -Wl,--allow-multiple-definition -fuse-ld=lld)
target_compile_options(runUnitTests PRIVATE "-fPIC")
add_test(UnitTests runUnitTests) add_test(UnitTests runUnitTests)
endif(BUILD_CPP_TEST) endif(BUILD_CPP_TEST)
...@@ -525,8 +594,7 @@ if(BUILD_SPARSE) ...@@ -525,8 +594,7 @@ if(BUILD_SPARSE)
ALL ALL
${CMAKE_COMMAND} -E env ${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD} CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_HIP=${USE_HIP}
USE_CUDA=${USE_CUDA}
BINDIR=${CMAKE_CURRENT_BINARY_DIR} BINDIR=${CMAKE_CURRENT_BINARY_DIR}
INCLUDEDIR="${DGL_INCLUDE_DIRS}" INCLUDEDIR="${DGL_INCLUDE_DIRS}"
CFLAGS=${CMAKE_C_FLAGS} CFLAGS=${CMAKE_C_FLAGS}
...@@ -541,12 +609,12 @@ endif(BUILD_SPARSE) ...@@ -541,12 +609,12 @@ endif(BUILD_SPARSE)
if(BUILD_GRAPHBOLT) if(BUILD_GRAPHBOLT)
message(STATUS "Configuring graphbolt library") message(STATUS "Configuring graphbolt library")
string(REPLACE ";" "\\;" CUDA_ARCHITECTURES_ESCAPED "${CUDA_ARCHITECTURES}") # string(REPLACE ";" "\\;" CUDA_ARCHITECTURES_ESCAPED "${CUDA_ARCHITECTURES}")
file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR) file(TO_NATIVE_PATH ${CMAKE_CURRENT_BINARY_DIR} BINDIR)
file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD) file(TO_NATIVE_PATH ${CMAKE_COMMAND} CMAKE_CMD)
if(USE_CUDA) if(USE_HIP)
get_target_property(GPU_CACHE_INCLUDE_DIRS gpu_cache INCLUDE_DIRECTORIES) get_target_property(GPU_CACHE_INCLUDE_DIRS gpu_cache INCLUDE_DIRECTORIES)
endif(USE_CUDA) endif(USE_HIP)
string(REPLACE ";" "\\;" GPU_CACHE_INCLUDE_DIRS_ESCAPED "${GPU_CACHE_INCLUDE_DIRS}") string(REPLACE ";" "\\;" GPU_CACHE_INCLUDE_DIRS_ESCAPED "${GPU_CACHE_INCLUDE_DIRS}")
if(MSVC) if(MSVC)
file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.bat BUILD_SCRIPT) file(TO_NATIVE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/graphbolt/build.bat BUILD_SCRIPT)
...@@ -573,8 +641,7 @@ if(BUILD_GRAPHBOLT) ...@@ -573,8 +641,7 @@ if(BUILD_GRAPHBOLT)
ALL ALL
${CMAKE_COMMAND} -E env ${CMAKE_COMMAND} -E env
CMAKE_COMMAND=${CMAKE_CMD} CMAKE_COMMAND=${CMAKE_CMD}
CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR} USE_HIP=${USE_HIP}
USE_CUDA=${USE_CUDA}
BINDIR=${CMAKE_CURRENT_BINARY_DIR} BINDIR=${CMAKE_CURRENT_BINARY_DIR}
GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}" GPU_CACHE_INCLUDE_DIRS="${GPU_CACHE_INCLUDE_DIRS_ESCAPED}"
CFLAGS=${CMAKE_C_FLAGS} CFLAGS=${CMAKE_C_FLAGS}
......
################################################################################################
# Config hip compilation.
# Usage:
# dgl_config_hip(<dgl_cuda_src>)
macro(dgl_config_hip out_variable)
if(NOT HIP_FOUND)
message(FATAL_ERROR "Cannot find HIP.")
endif()
# always set the includedir when cuda is available
# avoid global retrigger of cmake
include_directories(${CUDA_INCLUDE_DIRS})
add_definitions(-DDGL_USE_CUDA)
add_definitions(-D__HIP_PLATFORM_AMD__)
add_definitions(-DCUDART_VERSION_LT_11000=true)
add_definitions(-DDTKRT_VERSION=11080)
add_definitions(-D__DTK_ARCH__=11080)
include_directories(BEFORE SYSTEM "${CMAKE_SOURCE_DIR}/include/")
message(STATUS ">>>>>>>>>>>> CUDA_INCLUDE_DIRS : ${CUDA_INCLUDE_DIRS}")
set_source_files_properties(src/random/random.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(src/array/cuda/csr_transpose.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
set_source_files_properties(src/runtime/cuda/cuda_device_api.cc PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
file(GLOB_RECURSE DGL_HIP_SRC
src/array/cuda/*.cc
src/array/cuda/*.hip
src/array/cuda/uvm/*.cc
src/array/cuda/uvm/*.hip
src/kernel/cuda/*.cc
src/kernel/cuda/*.hip
src/partition/cuda/*.hip
src/runtime/cuda/*.cc
src/runtime/cuda/*.hip
src/geometry/cuda/*.hip
src/graph/transform/cuda/*.hip
src/graph/sampling/randomwalks/*.hip
)
find_library(DCU_RUNTIME galaxyhip ${ROCM_PATH}/lib)
find_library(DCU_SPARSE hipsparse ${ROCM_PATH}/lib)
find_library(DCU_BLAS hipblas ${ROCM_PATH}/lib)
find_library(DCU_RAND hiprand ${ROCM_PATH}/lib)
message(STATUS "Found DCU_RUNTIME: ${DCU_RUNTIME}")
message(STATUS "Found DCU_SPARSE: ${DCU_SPARSE}")
message(STATUS "Found DCU_BLAS: ${DCU_BLAS}")
message(STATUS "Found DCU_RAND: ${DCU_RAND}")
list(APPEND DGL_LINKER_LIBS
${DCU_RUNTIME}
${DCU_SPARSE}
${DCU_BLAS}
${DCU_RAND}
)
set(${out_variable} ${DGL_HIP_SRC})
endmacro()
...@@ -22,7 +22,7 @@ CMAKE_FLAGS="$CMAKE_FLAGS -DDGL_INCLUDE_DIRS=${INCLUDEDIR// /;} -DDGL_BUILD_DIR= ...@@ -22,7 +22,7 @@ CMAKE_FLAGS="$CMAKE_FLAGS -DDGL_INCLUDE_DIRS=${INCLUDEDIR// /;} -DDGL_BUILD_DIR=
echo $CMAKE_FLAGS echo $CMAKE_FLAGS
if [ $# -eq 0 ]; then if [ $# -eq 0 ]; then
$CMAKE_COMMAND $CMAKE_FLAGS .. CC=hipcc CXX=hipcc $CMAKE_COMMAND $CMAKE_FLAGS ..
make -j VERBOSE=1 make -j VERBOSE=1
cp -v $CPSOURCE $BINDIR/dgl_sparse cp -v $CPSOURCE $BINDIR/dgl_sparse
else else
...@@ -30,7 +30,7 @@ else ...@@ -30,7 +30,7 @@ else
TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])') TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])')
mkdir -p $TORCH_VER mkdir -p $TORCH_VER
cd $TORCH_VER cd $TORCH_VER
$CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../.. CC=hipcc CXX=hipcc $CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../..
make -j VERBOSE=1 make -j VERBOSE=1
cp -v $CPSOURCE $BINDIR/dgl_sparse cp -v $CPSOURCE $BINDIR/dgl_sparse
cd .. cd ..
......
...@@ -8,6 +8,12 @@ if(USE_CUDA) ...@@ -8,6 +8,12 @@ if(USE_CUDA)
add_definitions(-DGRAPHBOLT_USE_CUDA) add_definitions(-DGRAPHBOLT_USE_CUDA)
endif() endif()
if(USE_HIP)
message(STATUS "Build graphbolt with CUDA support")
enable_language(HIP)
add_definitions(-DGRAPHBOLT_USE_CUDA)
endif()
# For windows, define NOMINMAX to avoid conflict with std::min/max # For windows, define NOMINMAX to avoid conflict with std::min/max
if(MSVC) if(MSVC)
add_definitions(-DNOMINMAX) add_definitions(-DNOMINMAX)
...@@ -44,14 +50,15 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") ...@@ -44,14 +50,15 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -O0 -g3 -ggdb")
set(LIB_GRAPHBOLT_NAME "graphbolt_pytorch_${TORCH_VER}") set(LIB_GRAPHBOLT_NAME "graphbolt_pytorch_${TORCH_VER}")
# set(LIB_GRAPHBOLT_NAME "graphbolt")
set(BOLT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src") set(BOLT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
set(BOLT_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include") set(BOLT_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include")
file(GLOB BOLT_HEADERS ${BOLT_INCLUDE}) file(GLOB BOLT_HEADERS ${BOLT_INCLUDE})
file(GLOB BOLT_SRC ${BOLT_DIR}/*.cc) file(GLOB BOLT_SRC ${BOLT_DIR}/*.cc)
if(USE_CUDA) if(USE_HIP)
file(GLOB BOLT_CUDA_SRC file(GLOB BOLT_CUDA_SRC
${BOLT_DIR}/cuda/*.cu ${BOLT_DIR}/cuda/*.hip
${BOLT_DIR}/cuda/*.cc ${BOLT_DIR}/cuda/*.cc
) )
list(APPEND BOLT_SRC ${BOLT_CUDA_SRC}) list(APPEND BOLT_SRC ${BOLT_CUDA_SRC})
...@@ -67,20 +74,28 @@ target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${BOLT_DIR} ...@@ -67,20 +74,28 @@ target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${BOLT_DIR}
"../third_party/pcg/include") "../third_party/pcg/include")
target_link_libraries(${LIB_GRAPHBOLT_NAME} "${TORCH_LIBRARIES}") target_link_libraries(${LIB_GRAPHBOLT_NAME} "${TORCH_LIBRARIES}")
if(USE_CUDA) if(USE_HIP)
set_target_properties(${LIB_GRAPHBOLT_NAME} PROPERTIES CUDA_STANDARD 17) # set_target_properties(${LIB_GRAPHBOLT_NAME} PROPERTIES CUDA_STANDARD 17)
message(STATUS "Use external CCCL library for a consistent API and performance for graphbolt.") message(STATUS "Use external CCCL library for a consistent API and performance for graphbolt.")
target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE
"../third_party/cccl/thrust" # # "/opt/dgl_dep/hipcub-install-0915/include/"
"../third_party/cccl/cub" # # "/opt/dgl_dep/rocprim-install-0915/include/"
"../third_party/cccl/libcudacxx/include") # "${ROCM_PATH}/include/thrust"
"${ROCM_PATH}/include/hipcub"
"${ROCM_PATH}/include/rocprim"
)
# target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE
# "../third_party/cccl/thrust"
# "../third_party/cccl/cub"
# "../third_party/cccl/libcudacxx/include")
message(STATUS "Use HugeCTR gpu_cache for graphbolt with INCLUDE_DIRS $ENV{GPU_CACHE_INCLUDE_DIRS}.") message(STATUS "Use HugeCTR gpu_cache for graphbolt with INCLUDE_DIRS $ENV{GPU_CACHE_INCLUDE_DIRS}.")
target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE $ENV{GPU_CACHE_INCLUDE_DIRS}) target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE $ENV{GPU_CACHE_INCLUDE_DIRS})
target_link_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${GPU_CACHE_BUILD_DIR}) target_link_directories(${LIB_GRAPHBOLT_NAME} PRIVATE ${GPU_CACHE_BUILD_DIR})
target_link_libraries(${LIB_GRAPHBOLT_NAME} gpu_cache) target_link_libraries(${LIB_GRAPHBOLT_NAME} gpu_cache)
get_property(archs TARGET ${LIB_GRAPHBOLT_NAME} PROPERTY CUDA_ARCHITECTURES) # get_property(archs TARGET ${LIB_GRAPHBOLT_NAME} PROPERTY CUDA_ARCHITECTURES)
message(STATUS "CUDA_ARCHITECTURES for graphbolt: ${archs}") message(STATUS "CUDA_ARCHITECTURES for graphbolt: ${archs}")
endif() endif()
......
...@@ -12,20 +12,27 @@ else ...@@ -12,20 +12,27 @@ else
CPSOURCE=*.so CPSOURCE=*.so
fi fi
CMAKE_FLAGS="-DCUDA_TOOLKIT_ROOT_DIR=$CUDA_TOOLKIT_ROOT_DIR -DUSE_CUDA=$USE_CUDA -DGPU_CACHE_BUILD_DIR=$BINDIR" CMAKE_FLAGS=" -DUSE_HIP=$USE_HIP -DGPU_CACHE_BUILD_DIR=$BINDIR"
echo $CMAKE_FLAGS echo $CMAKE_FLAGS
# add new hipcub
# export C_INCLUDE_PATH=/opt/dgl_dep/hipcub-install-0915/include/:$C_INCLUDE_PATH
# export CPLUS_INCLUDE_PATH=/opt/dgl_dep/hipcub-install-0915/include/:$C_INCLUDE_PATH
# export C_INCLUDE_PATH=/opt/dgl_dep/rocprim-install-0915/include/:$C_INCLUDE_PATH
# export CPLUS_INCLUDE_PATH=/opt/dgl_dep/rocprim-install-0915/include/:$C_INCLUDE_PATH
if [ $# -eq 0 ]; then if [ $# -eq 0 ]; then
$CMAKE_COMMAND $CMAKE_FLAGS .. CC=hipcc CXX=hipcc $CMAKE_COMMAND $CMAKE_FLAGS ..
make -j make -j VERBOSE=1
cp -v $CPSOURCE $BINDIR/graphbolt cp -v $CPSOURCE $BINDIR/graphbolt
else else
for PYTHON_INTERP in $@; do for PYTHON_INTERP in $@; do
TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])') TORCH_VER=$($PYTHON_INTERP -c 'import torch; print(torch.__version__.split("+")[0])')
mkdir -p $TORCH_VER mkdir -p $TORCH_VER
cd $TORCH_VER cd $TORCH_VER
$CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../.. CC=hipcc CXX=hipcc $CMAKE_COMMAND $CMAKE_FLAGS -DPYTHON_INTERP=$PYTHON_INTERP ../..
make -j make -j VERBOSE=1
cp -v $CPSOURCE $BINDIR/graphbolt cp -v $CPSOURCE $BINDIR/graphbolt
cd .. cd ..
done done
......
...@@ -5,13 +5,14 @@ ...@@ -5,13 +5,14 @@
* @file cuda/expand_indptr.cu * @file cuda/expand_indptr.cu
* @brief ExpandIndptr operator implementation on CUDA. * @brief ExpandIndptr operator implementation on CUDA.
*/ */
#include <hip/hip_runtime.h>
#include <thrust/iterator/constant_iterator.h> #include <thrust/iterator/constant_iterator.h>
#include <thrust/iterator/counting_iterator.h> #include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h> #include <thrust/iterator/transform_iterator.h>
#include <hipcub/hipcub.hpp> #include <hipcub/hipcub.hpp>
#include <limits> #include <limits>
#include <hipcub/backend/rocprim/device/device_copy.hpp>
#include "common.h" #include "common.h"
namespace graphbolt { namespace graphbolt {
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include <thrust/iterator/counting_iterator.h> #include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h> #include <thrust/iterator/transform_iterator.h>
#include <thrust/iterator/transform_output_iterator.h> #include <thrust/iterator/transform_output_iterator.h>
#include <hipcub/backend/rocprim/device/device_copy.hpp>
#include <algorithm> #include <algorithm>
#include <array> #include <array>
...@@ -27,6 +28,29 @@ ...@@ -27,6 +28,29 @@
#include "common.h" #include "common.h"
#include "utils.h" #include "utils.h"
namespace rocprim{
namespace detail{
template<>
struct float_bit_mask<__hip_bfloat16>
{
static constexpr uint16_t sign_bit = 0x8000;
static constexpr uint16_t exponent = 0x7F80;
static constexpr uint16_t mantissa = 0x007F;
using bit_type = uint16_t;
};
template<>
struct radix_key_codec_base<__hip_bfloat16> : radix_key_codec_floating<__hip_bfloat16, unsigned short> {
};
}
}
__host__ __device__ bool operator>(const __hip_bfloat16& a, const __hip_bfloat16& b)
{
return float(a)>float(b);
}
namespace graphbolt { namespace graphbolt {
namespace ops { namespace ops {
...@@ -344,7 +368,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors( ...@@ -344,7 +368,7 @@ c10::intrusive_ptr<sampling::FusedSampledSubgraph> SampleNeighbors(
CUB_CALL( CUB_CALL(
DeviceSegmentedSort::SortKeys, edge_id_segments.get(), DeviceSegmentedSort::SortKeys, edge_id_segments.get(),
sorted_edge_id_segments.get(), picked_eids.size(0), sorted_edge_id_segments.get(), picked_eids.size(0),
num_rows, sub_indptr.data_ptr<indptr_t>(), num_rows, sampled_segment_end_it,
sampled_segment_end_it); sampled_segment_end_it);
} }
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
* @file cuda/unique_and_compact_impl.cu * @file cuda/unique_and_compact_impl.cu
* @brief Unique and compact operator implementation on CUDA. * @brief Unique and compact operator implementation on CUDA.
*/ */
#include <hip/hip_runtime.h>
#include <graphbolt/cuda_ops.h> #include <graphbolt/cuda_ops.h>
#include <thrust/binary_search.h> #include <thrust/binary_search.h>
#include <thrust/functional.h> #include <thrust/functional.h>
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/array.h * @file dgl/array.h
...@@ -8,10 +9,10 @@ ...@@ -8,10 +9,10 @@
*/ */
#ifndef DGL_ARRAY_H_ #ifndef DGL_ARRAY_H_
#define DGL_ARRAY_H_ #define DGL_ARRAY_H_
#include "./aten/array_ops.h" #include "aten/array_ops.h"
#include "./aten/coo.h" #include "aten/coo.h"
#include "./aten/csr.h" #include "aten/csr.h"
#include "./aten/macro.h" #include "aten/macro.h"
#include "./aten/spmat.h" #include "aten/spmat.h"
#include "./aten/types.h" #include "aten/types.h"
#endif // DGL_ARRAY_H_ #endif // DGL_ARRAY_H_
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/array_iterator.h * @file dgl/array_iterator.h
...@@ -6,11 +7,11 @@ ...@@ -6,11 +7,11 @@
#ifndef DGL_ARRAY_ITERATOR_H_ #ifndef DGL_ARRAY_ITERATOR_H_
#define DGL_ARRAY_ITERATOR_H_ #define DGL_ARRAY_ITERATOR_H_
#ifdef __CUDA_ARCH__ #ifdef __HIPCC__
#define CUB_INLINE __host__ __device__ __forceinline__ #define CUB_INLINE __host__ __device__ __forceinline__
#else #else
#define CUB_INLINE inline #define CUB_INLINE inline
#endif // __CUDA_ARCH__ #endif // __HIP_DEVICE_COMPILE__
#include <algorithm> #include <algorithm>
#include <iterator> #include <iterator>
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/aten/array_ops.h * @file dgl/aten/array_ops.h
...@@ -15,7 +16,7 @@ ...@@ -15,7 +16,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./types.h" #include "types.h"
namespace dgl { namespace dgl {
namespace aten { namespace aten {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020-2022 by Contributors * Copyright (c) 2020-2022 by Contributors
...@@ -15,10 +16,10 @@ ...@@ -15,10 +16,10 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./array_ops.h" #include "array_ops.h"
#include "./macro.h" #include "macro.h"
#include "./spmat.h" #include "spmat.h"
#include "./types.h" #include "types.h"
namespace dgl { namespace dgl {
namespace aten { namespace aten {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020-2022 by Contributors * Copyright (c) 2020-2022 by Contributors
* @file dgl/aten/csr.h * @file dgl/aten/csr.h
...@@ -14,10 +15,10 @@ ...@@ -14,10 +15,10 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./array_ops.h" #include "array_ops.h"
#include "./macro.h" #include "macro.h"
#include "./spmat.h" #include "spmat.h"
#include "./types.h" #include "types.h"
namespace dgl { namespace dgl {
namespace aten { namespace aten {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/aten/macro.h * @file dgl/aten/macro.h
...@@ -47,7 +48,7 @@ ...@@ -47,7 +48,7 @@
if ((val) == kDGLCPU) { \ if ((val) == kDGLCPU) { \
constexpr auto XPU = kDGLCPU; \ constexpr auto XPU = kDGLCPU; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ((val) == kDGLCUDA) { \ } else if ((val) == kDGLCUDA or (val) == kDGLROCM) { \
constexpr auto XPU = kDGLCUDA; \ constexpr auto XPU = kDGLCUDA; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else { \ } else { \
...@@ -145,12 +146,12 @@ ...@@ -145,12 +146,12 @@
typedef double FloatType; \ typedef double FloatType; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ( \ } else if ( \
XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLFloat) { \ (XPU == kDGLCUDA || XPU == kDGLROCM)&&(val).bits == 16 && (val).code == kDGLFloat) { \
typedef __half FloatType; \ typedef __half FloatType; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ( \ } else if ( \
XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLBfloat) { \ (XPU == kDGLCUDA || XPU == kDGLROCM) && (val).bits == 16 && (val).code == kDGLBfloat) { \
typedef __nv_bfloat16 FloatType; \ typedef __hip_bfloat16 FloatType; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ( \ } else if ( \
XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \
...@@ -176,11 +177,11 @@ ...@@ -176,11 +177,11 @@
typedef double FloatType; \ typedef double FloatType; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ( \ } else if ( \
XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLFloat) { \ (XPU == kDGLCUDA || && XPU == kDGLROCM) && (val).bits == 16 && (val).code == kDGLFloat) { \
typedef __half FloatType; \ typedef __half FloatType; \
{ __VA_ARGS__ } \ { __VA_ARGS__ } \
} else if ( \ } else if ( \
XPU == kDGLCUDA && (val).bits == 16 && (val).code == kDGLBfloat) { \ (XPU == kDGLCUDA || && XPU == kDGLROCM) && (val).bits == 16 && (val).code == kDGLBfloat) { \
LOG(FATAL) << "bfloat16 requires CUDA >= 11.0"; \ LOG(FATAL) << "bfloat16 requires CUDA >= 11.0"; \
} else if ( \ } else if ( \
XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \ XPU == kDGLCPU && (val).bits == 16 && (val).code == kDGLFloat) { \
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/aten/spmat.h * @file dgl/aten/spmat.h
...@@ -10,7 +11,7 @@ ...@@ -10,7 +11,7 @@
#include <vector> #include <vector>
#include "../runtime/object.h" #include "../runtime/object.h"
#include "./types.h" #include "types.h"
namespace dgl { namespace dgl {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2019 by Contributors * Copyright (c) 2019 by Contributors
* @file dgl/heterograph_interface.h * @file dgl/heterograph_interface.h
...@@ -13,7 +14,7 @@ ...@@ -13,7 +14,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./runtime/object.h" #include "runtime/object.h"
#include "array.h" #include "array.h"
#include "aten/spmat.h" #include "aten/spmat.h"
#include "aten/types.h" #include "aten/types.h"
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/aten/bcast.h * @file dgl/aten/bcast.h
...@@ -9,7 +10,7 @@ ...@@ -9,7 +10,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "./runtime/ndarray.h" #include "runtime/ndarray.h"
using namespace dgl::runtime; using namespace dgl::runtime;
namespace dgl { namespace dgl {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2018 by Contributors * Copyright (c) 2018 by Contributors
* @file dgl/graph_interface.h * @file dgl/graph_interface.h
...@@ -12,7 +13,7 @@ ...@@ -12,7 +13,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./runtime/object.h" #include "runtime/object.h"
#include "array.h" #include "array.h"
namespace dgl { namespace dgl {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2020 by Contributors * Copyright (c) 2020 by Contributors
* @file dgl/aten/kernel.h * @file dgl/aten/kernel.h
...@@ -10,8 +11,8 @@ ...@@ -10,8 +11,8 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "./base_heterograph.h" #include "base_heterograph.h"
#include "./bcast.h" #include "bcast.h"
#include "array.h" #include "array.h"
namespace dgl { namespace dgl {
......
// !!! This is a file automatically generated by hipify!!!
/** /**
* Copyright (c) 2019 by Contributors * Copyright (c) 2019 by Contributors
* @file dgl/nodeflow.h * @file dgl/nodeflow.h
...@@ -10,7 +11,7 @@ ...@@ -10,7 +11,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "./runtime/object.h" #include "runtime/object.h"
#include "graph_interface.h" #include "graph_interface.h"
namespace dgl { namespace dgl {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment