Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
90f368bb
Commit
90f368bb
authored
Jul 11, 2022
by
rocking
Browse files
Reproduce compiler wrong result
parent
e48ddb6a
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
20 additions
and
22 deletions
+20
-22
CMakeLists.txt
CMakeLists.txt
+11
-20
example/27_layernorm/layernorm_blockwise.cpp
example/27_layernorm/layernorm_blockwise.cpp
+2
-2
include/ck/tensor_operation/gpu/grid/gridwise_layernorm.hpp
include/ck/tensor_operation/gpu/grid/gridwise_layernorm.hpp
+7
-0
No files found.
CMakeLists.txt
View file @
90f368bb
...
@@ -7,8 +7,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
...
@@ -7,8 +7,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing
()
enable_testing
()
set
(
ROCM_SYMLINK_LIBS OFF
)
find_package
(
ROCM REQUIRED PATHS /opt/rocm
)
find_package
(
ROCM 0.8 REQUIRED PATHS /opt/rocm
)
include
(
ROCMInstallTargets
)
include
(
ROCMInstallTargets
)
include
(
ROCMPackageConfigHelpers
)
include
(
ROCMPackageConfigHelpers
)
...
@@ -17,7 +16,7 @@ include(ROCMInstallSymlinks)
...
@@ -17,7 +16,7 @@ include(ROCMInstallSymlinks)
include
(
ROCMCreatePackage
)
include
(
ROCMCreatePackage
)
include
(
CheckCXXCompilerFlag
)
include
(
CheckCXXCompilerFlag
)
rocm_setup_version
(
VERSION
0.2
.0
)
rocm_setup_version
(
VERSION
1.0
.0
)
include
(
TargetFlags
)
include
(
TargetFlags
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
...
@@ -71,6 +70,14 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
...
@@ -71,6 +70,14 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif
()
endif
()
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
message
(
STATUS
"Build with HIP
${
HIP_VERSION
}
"
)
rocm_create_package
(
NAME composablekernel
DESCRIPTION
"High Performance Composable Kernel for AMD GPUs"
MAINTAINER
"MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
)
## tidy
## tidy
include
(
EnableCompilerWarnings
)
include
(
EnableCompilerWarnings
)
set
(
CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
set
(
CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name
)
...
@@ -231,11 +238,6 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
...
@@ -231,11 +238,6 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
rocm_package_setup_component
(
tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests
# Prevent -static suffix on package name
)
add_subdirectory
(
library
)
add_subdirectory
(
library
)
add_subdirectory
(
example
)
add_subdirectory
(
example
)
add_subdirectory
(
test
)
add_subdirectory
(
test
)
...
@@ -257,19 +259,8 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
...
@@ -257,19 +259,8 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO
NO_CHECK_REQUIRED_COMPONENTS_MACRO
)
)
rocm_
install
(
FILES
install
(
FILES
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfig.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfig.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfigVersion.cmake"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/composable_kernelConfigVersion.cmake"
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
DESTINATION
${
CMAKE_INSTALL_LIBDIR
}
/cmake/composable_kernel
)
)
set
(
CPACK_RESOURCE_FILE_LICENSE
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/LICENSE"
)
set
(
CPACK_RPM_PACKAGE_LICENSE
"MIT"
)
rocm_create_package
(
NAME composablekernel
DESCRIPTION
"High Performance Composable Kernel for AMD GPUs"
MAINTAINER
"MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
HEADER_ONLY
)
example/27_layernorm/layernorm_blockwise.cpp
View file @
90f368bb
...
@@ -38,8 +38,8 @@ using DeviceInstance = ck::tensor_operation::device::DeviceLayernorm<XDataType,
...
@@ -38,8 +38,8 @@ using DeviceInstance = ck::tensor_operation::device::DeviceLayernorm<XDataType,
Rank
,
Rank
,
NumReduceDim
,
NumReduceDim
,
256
,
// BlockSize
256
,
// BlockSize
8
,
// ClusterM
1
,
// ClusterM
3
2
,
// ClusterK
2
56
,
// ClusterK
1
,
// SliceM
1
,
// SliceM
8
,
// SliceK
8
,
// SliceK
1
,
// SrcVecDim (0=M, 1=K)
1
,
// SrcVecDim (0=M, 1=K)
...
...
include/ck/tensor_operation/gpu/grid/gridwise_layernorm.hpp
View file @
90f368bb
...
@@ -298,6 +298,13 @@ struct GridwiseLayernorm_mk_to_mk
...
@@ -298,6 +298,13 @@ struct GridwiseLayernorm_mk_to_mk
static_for
<
0
,
MThreadSliceSize
,
1
>
{}([
&
](
auto
I
)
{
static_for
<
0
,
MThreadSliceSize
,
1
>
{}([
&
](
auto
I
)
{
BlockwiseSumReduce
::
Reduce
(
reduce_work_buf
,
mean_thread_buf
(
I
));
BlockwiseSumReduce
::
Reduce
(
reduce_work_buf
,
mean_thread_buf
(
I
));
// #define COMPILER_BUG
#ifdef COMPILER_BUG
if
(
mean_thread_buf
(
I
)
<
1
)
printf
(
"%f %d
\n
"
,
mean_thread_buf
(
I
),
reduce_length
);
#endif
mean_thread_buf
(
I
)
=
mean_thread_buf
(
I
)
/
reduce_length
;
mean_thread_buf
(
I
)
=
mean_thread_buf
(
I
)
/
reduce_length
;
BlockwiseSumReduce
::
Reduce
(
reduce_work_buf
,
mean_square_thread_buf
(
I
));
BlockwiseSumReduce
::
Reduce
(
reduce_work_buf
,
mean_square_thread_buf
(
I
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment