Commit 97096c0a authored by Artur Wojcik's avatar Artur Wojcik
Browse files

temp

parent ff24b537
...@@ -56,3 +56,12 @@ _templates/ ...@@ -56,3 +56,12 @@ _templates/
_toc.yml _toc.yml
docBin/ docBin/
_doxygen/ _doxygen/
# JetBrains IDE
.idea/
cmake-build*/
build*/
# Python virtualenv
.venv/
...@@ -4,22 +4,27 @@ if(POLICY CMP0140) ...@@ -4,22 +4,27 @@ if(POLICY CMP0140)
cmake_policy(SET CMP0140 NEW) cmake_policy(SET CMP0140 NEW)
endif() endif()
get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG)
# This has to be initialized before the project() command appears # This has to be initialized before the project() command appears
# Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE
if( NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE ) if(_GENERATOR_IS_MULTI_CONFIG)
set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;MinSizeRel" CACHE STRING
"Available build types (configurations) on multi-config generators")
else()
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel.")
endif() endif()
# Default installation path # Default installation path
if(WIN32) if(NOT WIN32)
set(CMAKE_INSTALL_PREFIX "/opt/rocm/x86_64-w64-mingw32" CACHE PATH "")
else()
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "") set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "")
endif() endif()
set(version 1.1.0) set(version 1.1.0)
# Check support for CUDA/HIP in Cmake # Check support for CUDA/HIP in Cmake
project(composable_kernel VERSION ${version}) project(composable_kernel VERSION ${version} LANGUAGES CXX)
include(CTest)
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
...@@ -72,15 +77,15 @@ if(INSTANCES_ONLY) ...@@ -72,15 +77,15 @@ if(INSTANCES_ONLY)
set(CK_ENABLE_INSTANCES_ONLY "ON") set(CK_ENABLE_INSTANCES_ONLY "ON")
endif() endif()
include(getopt)
# CK config file to record supported datatypes, etc. # CK config file to record supported datatypes, etc.
configure_file("${PROJECT_SOURCE_DIR}/include/ck/config.h.in" "${PROJECT_BINARY_DIR}/include/ck/config.h") configure_file(include/ck/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/config.h)
# CK version file to record release version as well as git commit hash # CK version file to record release version as well as git commit hash
find_package(Git REQUIRED) find_package(Git REQUIRED)
execute_process(COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) execute_process(COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE)
configure_file("${PROJECT_SOURCE_DIR}/include/ck/version.h.in" "${PROJECT_BINARY_DIR}/include/ck/version.h") configure_file(include/ck/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/version.h)
enable_testing()
set(ROCM_SYMLINK_LIBS OFF) set(ROCM_SYMLINK_LIBS OFF)
find_package(ROCM REQUIRED PATHS /opt/rocm) find_package(ROCM REQUIRED PATHS /opt/rocm)
...@@ -96,7 +101,7 @@ include(TargetFlags) ...@@ -96,7 +101,7 @@ include(TargetFlags)
rocm_setup_version(VERSION ${version}) rocm_setup_version(VERSION ${version})
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip) list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip "$ENV{ROCM_PATH}" "$ENV{HIP_PATH}")
message("GPU_TARGETS= ${GPU_TARGETS}") message("GPU_TARGETS= ${GPU_TARGETS}")
...@@ -141,7 +146,7 @@ find_package(hip) ...@@ -141,7 +146,7 @@ find_package(hip)
# SWDEV-413293 and https://reviews.llvm.org/D155213 # SWDEV-413293 and https://reviews.llvm.org/D155213
math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) * 100000 + ${hip_VERSION_PATCH}") math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) * 100000 + ${hip_VERSION_PATCH}")
message("hip_version_flat=${hip_VERSION_FLAT}") message("hip_version_flat=${hip_VERSION_FLAT}")
if(${hip_VERSION_FLAT} GREATER 500723302) if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 500723302)
message("Adding the fno-offload-uniform-block compiler flag") message("Adding the fno-offload-uniform-block compiler flag")
add_compile_options(-fno-offload-uniform-block) add_compile_options(-fno-offload-uniform-block)
endif() endif()
...@@ -167,7 +172,6 @@ find_package(Threads REQUIRED) ...@@ -167,7 +172,6 @@ find_package(Threads REQUIRED)
link_libraries(Threads::Threads) link_libraries(Threads::Threads)
## C++ ## C++
enable_language(CXX)
set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF) set(CMAKE_CXX_EXTENSIONS OFF)
...@@ -434,7 +438,9 @@ if(NOT DEFINED INSTANCES_ONLY) ...@@ -434,7 +438,9 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples PACKAGE_NAME examples
) )
add_subdirectory(example) add_subdirectory(example)
if(BUILD_TESTING)
add_subdirectory(test) add_subdirectory(test)
endif()
rocm_package_setup_component(profiler rocm_package_setup_component(profiler
LIBRARY_NAME composablekernel LIBRARY_NAME composablekernel
......
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
add_library(getopt::getopt INTERFACE IMPORTED GLOBAL)
if(WIN32)
include(FetchContent)
FetchContent_Declare(
getopt
GIT_REPOSITORY https://github.com/apwojcik/getopt.git
GIT_TAG main
SYSTEM
)
set(__build_shared_libs ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "")
FetchContent_MakeAvailable(getopt)
# Restore the old value of BUILD_SHARED_LIBS
set(BUILD_SHARED_LIBS ${__build_shared_libs} CACHE BOOL "Type of libraries to build" FORCE)
FetchContent_GetProperties(getopt)
target_link_libraries(getopt::getopt INTERFACE wingetopt)
target_include_directories(getopt::getopt INTERFACE ${getopt_SOURCE_DIR}/src)
endif()
\ No newline at end of file
include(FetchContent)
set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
if(GOOGLETEST_DIR)
set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
endif()
message(STATUS "Fetching GoogleTest")
list(APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-reserved-identifier
-Wno-global-constructors
-Wno-missing-noreturn
-Wno-disabled-macro-expansion
-Wno-used-but-marked-unused
-Wno-switch-enum
-Wno-zero-as-null-pointer-constant
-Wno-unused-member-function
-Wno-comma
-Wno-old-style-cast
-Wno-deprecated
-Wno-unsafe-buffer-usage
)
message(STATUS "Suppressing googltest warnings with flags: ${GTEST_CMAKE_CXX_FLAGS}")
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG b85864c64758dec007208e56af933fc3f52044ee
)
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
FetchContent_Populate(googletest)
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gmock PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gmock_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
set_target_properties(gtest PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gtest_main PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gmock PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gmock_main PROPERTIES POSITION_INDEPENDENT_CODE ON)
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
include(FetchContent)
set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
if(GOOGLETEST_DIR)
set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
endif()
set(BUILD_GMOCK OFF CACHE INTERNAL "")
set(INSTALL_GTEST OFF CACHE INTERNAL "")
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571
SYSTEM
)
if(WIN32)
set(gtest_force_shared_crt ON CACHE_INTERNAL "")
endif()
# Store the current value of BUILD_SHARED_LIBS
set(__build_shared_libs ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "")
FetchContent_MakeAvailable(googletest)
# Restore the old value of BUILD_SHARED_LIBS
set(BUILD_SHARED_LIBS ${__build_shared_libs} CACHE BOOL "Type of libraries to build" FORCE)
set(GTEST_CXX_FLAGS
-Wno-undef
-Wno-global-constructors
-Wno-zero-as-null-pointer-constant
-Wno-switch-enum
-Wno-float-equal
-Wno-unused-member-function)
if(WIN32)
list(APPEND GTEST_CXX_FLAGS
-Wno-suggest-destructor-override
-Wno-suggest-override
-Wno-nonportable-system-include-path
-Wno-language-extension-token)
endif()
target_compile_options(gtest PRIVATE ${GTEST_CXX_FLAGS})
target_compile_options(gtest_main PRIVATE ${GTEST_CXX_FLAGS})
...@@ -174,6 +174,11 @@ struct PassThrough ...@@ -174,6 +174,11 @@ struct PassThrough
{ {
y = x; y = x;
} }
template <>
__host__ __device__ void operator()<int4_t, int>(int4_t& y, const int& x) const
{
y = type_convert<int4_t>(x);
}
#endif #endif
template <> template <>
......
...@@ -119,7 +119,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk ...@@ -119,7 +119,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
index_t num_k_block_tile_iteration, index_t num_k_block_tile_iteration,
AccDataType epsilon, AccDataType epsilon,
const InDataTypePointerTuple p_in_global_tuple, const InDataTypePointerTuple p_in_global_tuple,
XDataType* const __restrict__ p_x_lds, XDataType* const __restrict__ p_x_lds_,
const GammaDataType* const __restrict__ p_gamma_global, const GammaDataType* const __restrict__ p_gamma_global,
const BetaDataType* const __restrict__ p_beta_global, const BetaDataType* const __restrict__ p_beta_global,
YDataType* const __restrict__ p_y_global, YDataType* const __restrict__ p_y_global,
...@@ -149,7 +149,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk ...@@ -149,7 +149,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
p_y_global, y_grid_desc_m_k.GetElementSpaceSize()); p_y_global, y_grid_desc_m_k.GetElementSpaceSize());
auto x_lds_val_buf = make_dynamic_buffer<AddressSpaceEnum::Lds>( auto x_lds_val_buf = make_dynamic_buffer<AddressSpaceEnum::Lds>(
p_x_lds, x_grid_desc_m_k.GetElementSpaceSize() / grid_size); p_x_lds_, x_grid_desc_m_k.GetElementSpaceSize() / grid_size);
auto in_thread_buf_tuple = generate_tuple( auto in_thread_buf_tuple = generate_tuple(
[&](auto) { [&](auto) {
......
...@@ -328,7 +328,7 @@ struct WmmaSelector ...@@ -328,7 +328,7 @@ struct WmmaSelector
} }
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
template <> template <>
static constexpr auto GetWmma<int4_t, int, 16, 16>() static constexpr auto GetWmma<int4_t, int4_t, int, 16, 16>()
{ {
return WmmaInstr::wmma_i32_16x16x16_iu4; return WmmaInstr::wmma_i32_16x16x16_iu4;
} }
......
...@@ -152,7 +152,6 @@ ENDFOREACH() ...@@ -152,7 +152,6 @@ ENDFOREACH()
if(CK_DEVICE_OTHER_INSTANCES) if(CK_DEVICE_OTHER_INSTANCES)
add_library(device_other_operations STATIC ${CK_DEVICE_OTHER_INSTANCES}) add_library(device_other_operations STATIC ${CK_DEVICE_OTHER_INSTANCES})
add_library(composablekernels::device_other_operations ALIAS device_other_operations) add_library(composablekernels::device_other_operations ALIAS device_other_operations)
target_compile_features(device_other_operations PUBLIC)
set_target_properties(device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) set_target_properties(device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(device_other_operations PUBLIC target_include_directories(device_other_operations PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck> $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck>
......
set(DEVICE_SOFTMAX_INSTANCES) add_instance_library(device_softmax_instance
list(APPEND DEVICE_SOFTMAX_INSTANCES
device_softmax_f16_f16_instance_rank3_reduce1.cpp device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp device_softmax_f16_f16_instance_rank3_reduce3.cpp
...@@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES ...@@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES
device_softmax_f32_f32_instance_rank4_reduce2.cpp device_softmax_f32_f32_instance_rank4_reduce2.cpp
device_softmax_f32_f32_instance_rank4_reduce3.cpp device_softmax_f32_f32_instance_rank4_reduce3.cpp
device_softmax_f32_f32_instance_rank4_reduce4.cpp) device_softmax_f32_f32_instance_rank4_reduce4.cpp)
add_instance_library(device_softmax_instance ${DEVICE_SOFTMAX_INSTANCES})
## utility add_library(utility STATIC
set(UTILITY_SOURCE
device_memory.cpp device_memory.cpp
host_tensor.cpp host_tensor.cpp
convolution_parameter.cpp convolution_parameter.cpp
) )
add_library(utility STATIC ${UTILITY_SOURCE})
add_library(composable_kernel::utility ALIAS utility) add_library(composable_kernel::utility ALIAS utility)
set_target_properties(utility PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_compile_options(utility PRIVATE ${CMAKE_COMPILER_WARNINGS})
target_include_directories(utility PUBLIC target_include_directories(utility PUBLIC
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck>" "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/utility>" "$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/utility>"
) )
if(WIN32)
target_compile_definitions(utility PUBLIC NOMINMAX)
endif()
rocm_install( rocm_install(
TARGETS utility TARGETS utility
......
...@@ -56,7 +56,7 @@ set(PROFILER_EXECUTABLE ckProfiler) ...@@ -56,7 +56,7 @@ set(PROFILER_EXECUTABLE ckProfiler)
add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES}) add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES})
target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors) target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility getopt::getopt)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance)
......
...@@ -3,7 +3,7 @@ include_directories(BEFORE ...@@ -3,7 +3,7 @@ include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include ${PROJECT_SOURCE_DIR}/profiler/include
) )
include(googletest) include(gtest)
add_custom_target(tests) add_custom_target(tests)
...@@ -50,6 +50,7 @@ function(add_test_executable TEST_NAME) ...@@ -50,6 +50,7 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list #only continue if there are some source files left on the list
if(ARGN) if(ARGN)
add_executable(${TEST_NAME} ${ARGN}) add_executable(${TEST_NAME} ${ARGN})
target_link_libraries(${TEST_NAME} PRIVATE getopt::getopt)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>) add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>)
add_dependencies(tests ${TEST_NAME}) add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME}) add_dependencies(check ${TEST_NAME})
...@@ -58,9 +59,7 @@ function(add_test_executable TEST_NAME) ...@@ -58,9 +59,7 @@ function(add_test_executable TEST_NAME)
endif() endif()
#message("add_test returns ${result}") #message("add_test returns ${result}")
set(result ${result} PARENT_SCOPE) set(result ${result} PARENT_SCOPE)
endfunction(add_test_executable TEST_NAME) endfunction()
include(GoogleTest)
function(add_gtest_executable TEST_NAME) function(add_gtest_executable TEST_NAME)
message("adding gtest ${TEST_NAME}") message("adding gtest ${TEST_NAME}")
...@@ -109,14 +108,14 @@ function(add_gtest_executable TEST_NAME) ...@@ -109,14 +108,14 @@ function(add_gtest_executable TEST_NAME)
# suppress gtest warnings # suppress gtest warnings
target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef) target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef)
target_link_libraries(${TEST_NAME} PRIVATE gtest_main) target_link_libraries(${TEST_NAME} PRIVATE gtest_main getopt::getopt)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>) add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}>)
rocm_install(TARGETS ${TEST_NAME} COMPONENT tests) rocm_install(TARGETS ${TEST_NAME} COMPONENT tests)
set(result 0) set(result 0)
endif() endif()
#message("add_gtest returns ${result}") #message("add_gtest returns ${result}")
set(result ${result} PARENT_SCOPE) set(result ${result} PARENT_SCOPE)
endfunction(add_gtest_executable TEST_NAME) endfunction()
add_subdirectory(magic_number_division) add_subdirectory(magic_number_division)
add_subdirectory(space_filling_curve) add_subdirectory(space_filling_curve)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment