Commit 097ed7e2 authored by sangwzh's avatar sangwzh
Browse files

update cmake for max blocksize and device judge

parent 833803f3
......@@ -33,6 +33,7 @@ dgl_option(EXTERNAL_METIS_PATH "Path to external metis" OFF)
dgl_option(EXTERNAL_METIS_LIB_PATH "Path to external metis library" OFF)
dgl_option(EXTERNAL_GKLIB_PATH "Path to external gklib" OFF)
# Options for building DGL features: "none," "dev," "dogfood," "release," and
# "all."
# "none" - The feature is OFF for all build types. This is used when
......@@ -275,9 +276,6 @@ file(GLOB_RECURSE DGL_SRC_1
)
list(APPEND DGL_SRC ${DGL_SRC_1})
if(NOT USE_HIP AND NOT USE_CUDA)
add_library(dgl SHARED ${DGL_SRC})
endif()
if (NOT MSVC)
file(GLOB_RECURSE DGL_RPC_SRC src/rpc/*.cc)
......@@ -296,6 +294,7 @@ if(USE_HIP)
set(HIP_HIPCC_FLAGS "-std=c++17")
add_library(dgl SHARED ${DGL_SRC})
target_link_options(dgl PRIVATE "-Wl,--allow-multiple-definition")
target_compile_options(dgl PUBLIC "--gpu-max-threads-per-block=1024")
# set_target_properties(dgl PROPERTIES LINKER_LANGUAGE hip)
target_link_libraries(dgl ${DGL_LINKER_LIBS})
target_include_directories(dgl PRIVATE "${CMAKE_SOURCE_DIR}/include/dgl")
......
......@@ -77,18 +77,13 @@ target_link_libraries(${LIB_GRAPHBOLT_NAME} "${TORCH_LIBRARIES}")
if(USE_HIP)
# set_target_properties(${LIB_GRAPHBOLT_NAME} PROPERTIES CUDA_STANDARD 17)
message(STATUS "Use external CCCL library for a consistent API and performance for graphbolt.")
target_compile_options(${LIB_GRAPHBOLT_NAME} PRIVATE "--gpu-max-threads-per-block=1024")
target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE
# # "/opt/dgl_dep/hipcub-install-0915/include/"
# # "/opt/dgl_dep/rocprim-install-0915/include/"
# "${ROCM_PATH}/include/thrust"
"${ROCM_PATH}/include/hipcub"
"${ROCM_PATH}/include/rocprim"
)
# target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE
# "../third_party/cccl/thrust"
# "../third_party/cccl/cub"
# "../third_party/cccl/libcudacxx/include")
message(STATUS "Use HugeCTR gpu_cache for graphbolt with INCLUDE_DIRS $ENV{GPU_CACHE_INCLUDE_DIRS}.")
target_include_directories(${LIB_GRAPHBOLT_NAME} PRIVATE $ENV{GPU_CACHE_INCLUDE_DIRS})
......
......@@ -107,7 +107,7 @@ class RangePartition : public NDArrayPartition {
// we have only one CPU context, and can safely copy the array to that.
range_cpu_(range.CopyTo(DGLContext{kDGLCPU, 0})) {
auto ctx = range->ctx;
if (ctx.device_type != kDGLCUDA) {
if (ctx.device_type != kDGLCUDA || ctx.device_type != kDGLROCM) {
LOG(FATAL) << "The range for an NDArrayPartition is only supported "
" on GPUs. Transfer the range to the target device before "
"creating the partition.";
......
......@@ -74,7 +74,6 @@ class CUDADeviceAPI final : public DeviceAPI {
hipDeviceProp_t props;
CUDA_CALL(hipGetDeviceProperties(&props, ctx.device_id));
*rv = std::string(props.name);
printf("******* debug: device.name:%s\n ",std::string(props.name).c_str());
return;
}
case kMaxClockRate: {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment