Commit 90f368bb authored by rocking's avatar rocking
Browse files

Reproduce compiler wrong result

parent e48ddb6a
...@@ -7,8 +7,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") ...@@ -7,8 +7,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing() enable_testing()
set(ROCM_SYMLINK_LIBS OFF) find_package(ROCM REQUIRED PATHS /opt/rocm)
find_package(ROCM 0.8 REQUIRED PATHS /opt/rocm)
include(ROCMInstallTargets) include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers) include(ROCMPackageConfigHelpers)
...@@ -17,7 +16,7 @@ include(ROCMInstallSymlinks) ...@@ -17,7 +16,7 @@ include(ROCMInstallSymlinks)
include(ROCMCreatePackage) include(ROCMCreatePackage)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
rocm_setup_version(VERSION 0.2.0) rocm_setup_version(VERSION 1.0.0)
include(TargetFlags) include(TargetFlags)
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip) list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip)
...@@ -71,6 +70,14 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH ) ...@@ -71,6 +70,14 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif() endif()
message(STATUS "Build with HIP ${HIP_VERSION}") message(STATUS "Build with HIP ${HIP_VERSION}")
rocm_create_package(
NAME composablekernel
DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
MAINTAINER "MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
)
## tidy ## tidy
include(EnableCompilerWarnings) include(EnableCompilerWarnings)
set(CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name) set(CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
...@@ -231,11 +238,6 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") ...@@ -231,11 +238,6 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
rocm_package_setup_component(tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests # Prevent -static suffix on package name
)
add_subdirectory(library) add_subdirectory(library)
add_subdirectory(example) add_subdirectory(example)
add_subdirectory(test) add_subdirectory(test)
...@@ -257,19 +259,8 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in ...@@ -257,19 +259,8 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO NO_CHECK_REQUIRED_COMPONENTS_MACRO
) )
rocm_install(FILES install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
) )
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
rocm_create_package(
NAME composablekernel
DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
MAINTAINER "MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
HEADER_ONLY
)
...@@ -38,8 +38,8 @@ using DeviceInstance = ck::tensor_operation::device::DeviceLayernorm<XDataType, ...@@ -38,8 +38,8 @@ using DeviceInstance = ck::tensor_operation::device::DeviceLayernorm<XDataType,
Rank, Rank,
NumReduceDim, NumReduceDim,
256, // BlockSize 256, // BlockSize
8, // ClusterM 1, // ClusterM
32, // ClusterK 256, // ClusterK
1, // SliceM 1, // SliceM
8, // SliceK 8, // SliceK
1, // SrcVecDim (0=M, 1=K) 1, // SrcVecDim (0=M, 1=K)
......
...@@ -298,6 +298,13 @@ struct GridwiseLayernorm_mk_to_mk ...@@ -298,6 +298,13 @@ struct GridwiseLayernorm_mk_to_mk
static_for<0, MThreadSliceSize, 1>{}([&](auto I) { static_for<0, MThreadSliceSize, 1>{}([&](auto I) {
BlockwiseSumReduce::Reduce(reduce_work_buf, mean_thread_buf(I)); BlockwiseSumReduce::Reduce(reduce_work_buf, mean_thread_buf(I));
// #define COMPILER_BUG
#ifdef COMPILER_BUG
if(mean_thread_buf(I) < 1)
printf("%f %d\n", mean_thread_buf(I), reduce_length);
#endif
mean_thread_buf(I) = mean_thread_buf(I) / reduce_length; mean_thread_buf(I) = mean_thread_buf(I) / reduce_length;
BlockwiseSumReduce::Reduce(reduce_work_buf, mean_square_thread_buf(I)); BlockwiseSumReduce::Reduce(reduce_work_buf, mean_square_thread_buf(I));
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment