diff --git a/CMakeLists.txt b/CMakeLists.txt index 5655ba17..1252d1ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,7 +1,7 @@ cmake_minimum_required(VERSION 3.14) # Check support for CUDA/HIP in Cmake -project(composable_kernel) +project(composable_kernel LANGUAGES CXX HIP) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -41,27 +41,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}") -## OpenMP -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - # workaround issue hipcc in rocm3.5 cannot find openmp - set(OpenMP_CXX "${CMAKE_CXX_COMPILER}") - set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument") - set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5") - set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES}) - set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES}) - set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES}) -else() - find_package(OpenMP REQUIRED) -endif() - -message("OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}") -message("OpenMP_gomp_LIBRARY: ${OpenMP_gomp_LIBRARY}") -message("OpenMP_pthread_LIBRARY: ${OpenMP_pthread_LIBRARY}") -message("OpenMP_CXX_FLAGS: ${OpenMP_CXX_FLAGS}") - -link_libraries(${OpenMP_gomp_LIBRARY}) -link_libraries(${OpenMP_pthread_LIBRARY}) - ## HIP find_package(HIP REQUIRED) # Override HIP version in config.h, if necessary. @@ -83,8 +62,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH ) message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}") endif() message(STATUS "Build with HIP ${HIP_VERSION}") -link_libraries(hip::device) -add_compile_definitions(__HIP_PLATFORM_HCC__=1) ## tidy include(EnableCompilerWarnings) @@ -263,9 +240,6 @@ rocm_package_setup_component(tests ) add_subdirectory(library) -add_subdirectory(example) -add_subdirectory(test) -add_subdirectory(profiler) #Create an interface target for the include only files and call it "composablekernels" include(CMakePackageConfigHelpers) @@ -291,11 +265,3 @@ rocm_install(FILES set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE") set(CPACK_RPM_PACKAGE_LICENSE "MIT") - -rocm_create_package( - NAME composablekernel - DESCRIPTION "High Performance Composable Kernel for AMD GPUs" - MAINTAINER "MIOpen Kernels Dev Team " - LDCONFIG - HEADER_ONLY -) diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp index 92018aac..2ada620c 100644 --- a/include/ck/ck.hpp +++ b/include/ck/ck.hpp @@ -126,7 +126,9 @@ #define CK_EXPERIMENTAL_USE_MEMCPY_FOR_BIT_CAST 1 // experimental feature: optimize for inter-wave scheduling policy +#ifndef CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING 0 +#endif #define CK_EXPERIMENTAL_INTER_WAVE_SCHEDULING_MAC_CLUSTERS 1 // hack: have underlying assumption that need to be satsified, otherwise it's a bug diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index c206c4dc..e45fac9d 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -1,7 +1,9 @@ function(add_instance_library INSTANCE_NAME) message("adding instance ${INSTANCE_NAME}") + set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP) add_library(${INSTANCE_NAME} OBJECT ${ARGN}) target_compile_features(${INSTANCE_NAME} PUBLIC) + target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1") set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) clang_tidy_check(${INSTANCE_NAME}) endfunction(add_instance_library INSTANCE_NAME)