Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
97096c0a
Commit
97096c0a
authored
Dec 05, 2023
by
Artur Wojcik
Browse files
temp
parent
ff24b537
Changes
13
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
130 additions
and
82 deletions
+130
-82
.gitignore
.gitignore
+9
-0
CMakeLists.txt
CMakeLists.txt
+20
-14
cmake/getopt.cmake
cmake/getopt.cmake
+28
-0
cmake/googletest.cmake
cmake/googletest.cmake
+0
-50
cmake/gtest.cmake
cmake/gtest.cmake
+52
-0
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
...or_operation/gpu/element/unary_element_wise_operation.hpp
+5
-0
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
.../grid/gridwise_elementwise_layernorm_welford_variance.hpp
+2
-2
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
+1
-1
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+0
-1
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
.../src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
+1
-3
library/src/utility/CMakeLists.txt
library/src/utility/CMakeLists.txt
+6
-4
profiler/src/CMakeLists.txt
profiler/src/CMakeLists.txt
+1
-1
test/CMakeLists.txt
test/CMakeLists.txt
+5
-6
No files found.
.gitignore
View file @
97096c0a
...
...
@@ -56,3 +56,12 @@ _templates/
_toc.yml
docBin/
_doxygen/
# JetBrains IDE
.idea/
cmake-build*/
build*/
# Python virtualenv
.venv/
CMakeLists.txt
View file @
97096c0a
...
...
@@ -4,22 +4,27 @@ if(POLICY CMP0140)
cmake_policy
(
SET CMP0140 NEW
)
endif
()
get_property
(
_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG
)
# This has to be initialized before the project() command appears
# Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE
if
(
NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE
)
set
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
)
if
(
_GENERATOR_IS_MULTI_CONFIG
)
set
(
CMAKE_CONFIGURATION_TYPES
"Debug;Release;RelWithDebInfo;MinSizeRel"
CACHE STRING
"Available build types (configurations) on multi-config generators"
)
else
()
set
(
CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel."
)
endif
()
# Default installation path
if
(
WIN32
)
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm/x86_64-w64-mingw32"
CACHE PATH
""
)
else
()
if
(
NOT WIN32
)
set
(
CMAKE_INSTALL_PREFIX
"/opt/rocm"
CACHE PATH
""
)
endif
()
set
(
version 1.1.0
)
# Check support for CUDA/HIP in Cmake
project
(
composable_kernel VERSION
${
version
}
)
project
(
composable_kernel VERSION
${
version
}
LANGUAGES CXX
)
include
(
CTest
)
list
(
APPEND CMAKE_MODULE_PATH
"
${
PROJECT_SOURCE_DIR
}
/cmake"
)
...
...
@@ -72,15 +77,15 @@ if(INSTANCES_ONLY)
set
(
CK_ENABLE_INSTANCES_ONLY
"ON"
)
endif
()
include
(
getopt
)
# CK config file to record supported datatypes, etc.
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/
include/ck/config.h.in
"
"
${
PROJEC
T_BINARY_DIR
}
/include/ck/config.h
"
)
configure_file
(
include/ck/config.h.in
${
CMAKE_CURREN
T_BINARY_DIR
}
/include/ck/config.h
)
# CK version file to record release version as well as git commit hash
find_package
(
Git REQUIRED
)
execute_process
(
COMMAND
"
${
GIT_EXECUTABLE
}
"
rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE
)
configure_file
(
"
${
PROJECT_SOURCE_DIR
}
/include/ck/version.h.in"
"
${
PROJECT_BINARY_DIR
}
/include/ck/version.h"
)
enable_testing
()
configure_file
(
include/ck/version.h.in
${
CMAKE_CURRENT_BINARY_DIR
}
/include/ck/version.h
)
set
(
ROCM_SYMLINK_LIBS OFF
)
find_package
(
ROCM REQUIRED PATHS /opt/rocm
)
...
...
@@ -96,7 +101,7 @@ include(TargetFlags)
rocm_setup_version
(
VERSION
${
version
}
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
)
list
(
APPEND CMAKE_PREFIX_PATH
${
CMAKE_INSTALL_PREFIX
}
${
CMAKE_INSTALL_PREFIX
}
/llvm
${
CMAKE_INSTALL_PREFIX
}
/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip
"$ENV{ROCM_PATH}"
"$ENV{HIP_PATH}"
)
message
(
"GPU_TARGETS=
${
GPU_TARGETS
}
"
)
...
...
@@ -141,7 +146,7 @@ find_package(hip)
# SWDEV-413293 and https://reviews.llvm.org/D155213
math
(
EXPR hip_VERSION_FLAT
"(
${
hip_VERSION_MAJOR
}
* 1000 +
${
hip_VERSION_MINOR
}
) * 100000 +
${
hip_VERSION_PATCH
}
"
)
message
(
"hip_version_flat=
${
hip_VERSION_FLAT
}
"
)
if
(
${
hip_VERSION_FLAT
}
GREATER 500723302
)
if
(
NOT WIN32 AND
${
hip_VERSION_FLAT
}
GREATER 500723302
)
message
(
"Adding the fno-offload-uniform-block compiler flag"
)
add_compile_options
(
-fno-offload-uniform-block
)
endif
()
...
...
@@ -167,7 +172,6 @@ find_package(Threads REQUIRED)
link_libraries
(
Threads::Threads
)
## C++
enable_language
(
CXX
)
set
(
CMAKE_CXX_STANDARD 17
)
set
(
CMAKE_CXX_STANDARD_REQUIRED ON
)
set
(
CMAKE_CXX_EXTENSIONS OFF
)
...
...
@@ -434,7 +438,9 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples
)
add_subdirectory
(
example
)
add_subdirectory
(
test
)
if
(
BUILD_TESTING
)
add_subdirectory
(
test
)
endif
()
rocm_package_setup_component
(
profiler
LIBRARY_NAME composablekernel
...
...
cmake/getopt.cmake
0 → 100644
View file @
97096c0a
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
add_library
(
getopt::getopt INTERFACE IMPORTED GLOBAL
)
if
(
WIN32
)
include
(
FetchContent
)
FetchContent_Declare
(
getopt
GIT_REPOSITORY https://github.com/apwojcik/getopt.git
GIT_TAG main
SYSTEM
)
set
(
__build_shared_libs
${
BUILD_SHARED_LIBS
}
)
set
(
BUILD_SHARED_LIBS OFF CACHE INTERNAL
""
)
FetchContent_MakeAvailable
(
getopt
)
# Restore the old value of BUILD_SHARED_LIBS
set
(
BUILD_SHARED_LIBS
${
__build_shared_libs
}
CACHE BOOL
"Type of libraries to build"
FORCE
)
FetchContent_GetProperties
(
getopt
)
target_link_libraries
(
getopt::getopt INTERFACE wingetopt
)
target_include_directories
(
getopt::getopt INTERFACE
${
getopt_SOURCE_DIR
}
/src
)
endif
()
\ No newline at end of file
cmake/googletest.cmake
deleted
100644 → 0
View file @
ff24b537
include
(
FetchContent
)
set
(
GOOGLETEST_DIR
""
CACHE STRING
"Location of local GoogleTest repo to build against"
)
if
(
GOOGLETEST_DIR
)
set
(
FETCHCONTENT_SOURCE_DIR_GOOGLETEST
${
GOOGLETEST_DIR
}
CACHE STRING
"GoogleTest source directory override"
)
endif
()
message
(
STATUS
"Fetching GoogleTest"
)
list
(
APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-reserved-identifier
-Wno-global-constructors
-Wno-missing-noreturn
-Wno-disabled-macro-expansion
-Wno-used-but-marked-unused
-Wno-switch-enum
-Wno-zero-as-null-pointer-constant
-Wno-unused-member-function
-Wno-comma
-Wno-old-style-cast
-Wno-deprecated
-Wno-unsafe-buffer-usage
)
message
(
STATUS
"Suppressing googltest warnings with flags:
${
GTEST_CMAKE_CXX_FLAGS
}
"
)
FetchContent_Declare
(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG b85864c64758dec007208e56af933fc3f52044ee
)
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_GetProperties
(
googletest
)
if
(
NOT googletest_POPULATED
)
FetchContent_Populate
(
googletest
)
add_subdirectory
(
${
googletest_SOURCE_DIR
}
${
googletest_BINARY_DIR
}
EXCLUDE_FROM_ALL
)
endif
()
target_compile_options
(
gtest PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gtest_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gmock PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
target_compile_options
(
gmock_main PRIVATE
${
GTEST_CMAKE_CXX_FLAGS
}
)
set_target_properties
(
gtest PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gtest_main PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gmock PROPERTIES POSITION_INDEPENDENT_CODE ON
)
set_target_properties
(
gmock_main PROPERTIES POSITION_INDEPENDENT_CODE ON
)
cmake/gtest.cmake
0 → 100644
View file @
97096c0a
# SPDX-License-Identifier: MIT
# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
include
(
FetchContent
)
set
(
GOOGLETEST_DIR
""
CACHE STRING
"Location of local GoogleTest repo to build against"
)
if
(
GOOGLETEST_DIR
)
set
(
FETCHCONTENT_SOURCE_DIR_GOOGLETEST
${
GOOGLETEST_DIR
}
CACHE STRING
"GoogleTest source directory override"
)
endif
()
set
(
BUILD_GMOCK OFF CACHE INTERNAL
""
)
set
(
INSTALL_GTEST OFF CACHE INTERNAL
""
)
FetchContent_Declare
(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571
SYSTEM
)
if
(
WIN32
)
set
(
gtest_force_shared_crt ON CACHE_INTERNAL
""
)
endif
()
# Store the current value of BUILD_SHARED_LIBS
set
(
__build_shared_libs
${
BUILD_SHARED_LIBS
}
)
set
(
BUILD_SHARED_LIBS OFF CACHE INTERNAL
""
)
FetchContent_MakeAvailable
(
googletest
)
# Restore the old value of BUILD_SHARED_LIBS
set
(
BUILD_SHARED_LIBS
${
__build_shared_libs
}
CACHE BOOL
"Type of libraries to build"
FORCE
)
set
(
GTEST_CXX_FLAGS
-Wno-undef
-Wno-global-constructors
-Wno-zero-as-null-pointer-constant
-Wno-switch-enum
-Wno-float-equal
-Wno-unused-member-function
)
if
(
WIN32
)
list
(
APPEND GTEST_CXX_FLAGS
-Wno-suggest-destructor-override
-Wno-suggest-override
-Wno-nonportable-system-include-path
-Wno-language-extension-token
)
endif
()
target_compile_options
(
gtest PRIVATE
${
GTEST_CXX_FLAGS
}
)
target_compile_options
(
gtest_main PRIVATE
${
GTEST_CXX_FLAGS
}
)
include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp
View file @
97096c0a
...
...
@@ -174,6 +174,11 @@ struct PassThrough
{
y
=
x
;
}
template
<
>
__host__
__device__
void
operator
()
<
int4_t
,
int
>
(
int4_t
&
y
,
const
int
&
x
)
const
{
y
=
type_convert
<
int4_t
>
(
x
);
}
#endif
template
<
>
...
...
include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp
View file @
97096c0a
...
...
@@ -119,7 +119,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
index_t
num_k_block_tile_iteration
,
AccDataType
epsilon
,
const
InDataTypePointerTuple
p_in_global_tuple
,
XDataType
*
const
__restrict__
p_x_lds
,
XDataType
*
const
__restrict__
p_x_lds
_
,
const
GammaDataType
*
const
__restrict__
p_gamma_global
,
const
BetaDataType
*
const
__restrict__
p_beta_global
,
YDataType
*
const
__restrict__
p_y_global
,
...
...
@@ -149,7 +149,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk
p_y_global
,
y_grid_desc_m_k
.
GetElementSpaceSize
());
auto
x_lds_val_buf
=
make_dynamic_buffer
<
AddressSpaceEnum
::
Lds
>
(
p_x_lds
,
x_grid_desc_m_k
.
GetElementSpaceSize
()
/
grid_size
);
p_x_lds
_
,
x_grid_desc_m_k
.
GetElementSpaceSize
()
/
grid_size
);
auto
in_thread_buf_tuple
=
generate_tuple
(
[
&
](
auto
)
{
...
...
include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp
View file @
97096c0a
...
...
@@ -328,7 +328,7 @@ struct WmmaSelector
}
#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
template
<
>
static
constexpr
auto
GetWmma
<
int4_t
,
int
,
16
,
16
>
()
static
constexpr
auto
GetWmma
<
int4_t
,
int4_t
,
int
,
16
,
16
>
()
{
return
WmmaInstr
::
wmma_i32_16x16x16_iu4
;
}
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
97096c0a
...
...
@@ -152,7 +152,6 @@ ENDFOREACH()
if
(
CK_DEVICE_OTHER_INSTANCES
)
add_library
(
device_other_operations STATIC
${
CK_DEVICE_OTHER_INSTANCES
}
)
add_library
(
composablekernels::device_other_operations ALIAS device_other_operations
)
target_compile_features
(
device_other_operations PUBLIC
)
set_target_properties
(
device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_include_directories
(
device_other_operations PUBLIC
$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>
...
...
library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt
View file @
97096c0a
set
(
DEVICE_SOFTMAX_INSTANCES
)
list
(
APPEND DEVICE_SOFTMAX_INSTANCES
add_instance_library
(
device_softmax_instance
device_softmax_f16_f16_instance_rank3_reduce1.cpp
device_softmax_f16_f16_instance_rank3_reduce2.cpp
device_softmax_f16_f16_instance_rank3_reduce3.cpp
...
...
@@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES
device_softmax_f32_f32_instance_rank4_reduce2.cpp
device_softmax_f32_f32_instance_rank4_reduce3.cpp
device_softmax_f32_f32_instance_rank4_reduce4.cpp
)
add_instance_library
(
device_softmax_instance
${
DEVICE_SOFTMAX_INSTANCES
}
)
library/src/utility/CMakeLists.txt
View file @
97096c0a
## utility
set
(
UTILITY_SOURCE
add_library
(
utility STATIC
device_memory.cpp
host_tensor.cpp
convolution_parameter.cpp
)
add_library
(
utility STATIC
${
UTILITY_SOURCE
}
)
add_library
(
composable_kernel::utility ALIAS utility
)
set_target_properties
(
utility PROPERTIES POSITION_INDEPENDENT_CODE ON
)
target_compile_options
(
utility PRIVATE
${
CMAKE_COMPILER_WARNINGS
}
)
target_include_directories
(
utility PUBLIC
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck>"
"$<INSTALL_INTERFACE:
${
CMAKE_INSTALL_INCLUDEDIR
}
/ck/library/utility>"
)
if
(
WIN32
)
target_compile_definitions
(
utility PUBLIC NOMINMAX
)
endif
()
rocm_install
(
TARGETS utility
...
...
profiler/src/CMakeLists.txt
View file @
97096c0a
...
...
@@ -56,7 +56,7 @@ set(PROFILER_EXECUTABLE ckProfiler)
add_executable
(
${
PROFILER_EXECUTABLE
}
${
PROFILER_SOURCES
}
)
target_compile_options
(
${
PROFILER_EXECUTABLE
}
PRIVATE -Wno-global-constructors
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE utility
getopt::getopt
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_splitk_instance
)
target_link_libraries
(
${
PROFILER_EXECUTABLE
}
PRIVATE device_gemm_add_multiply_instance
)
...
...
test/CMakeLists.txt
View file @
97096c0a
...
...
@@ -3,7 +3,7 @@ include_directories(BEFORE
${
PROJECT_SOURCE_DIR
}
/profiler/include
)
include
(
g
oogle
test
)
include
(
gtest
)
add_custom_target
(
tests
)
...
...
@@ -50,6 +50,7 @@ function(add_test_executable TEST_NAME)
#only continue if there are some source files left on the list
if
(
ARGN
)
add_executable
(
${
TEST_NAME
}
${
ARGN
}
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
add_dependencies
(
tests
${
TEST_NAME
}
)
add_dependencies
(
check
${
TEST_NAME
}
)
...
...
@@ -58,9 +59,7 @@ function(add_test_executable TEST_NAME)
endif
()
#message("add_test returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_test_executable TEST_NAME
)
include
(
GoogleTest
)
endfunction
()
function
(
add_gtest_executable TEST_NAME
)
message
(
"adding gtest
${
TEST_NAME
}
"
)
...
...
@@ -109,14 +108,14 @@ function(add_gtest_executable TEST_NAME)
# suppress gtest warnings
target_compile_options
(
${
TEST_NAME
}
PRIVATE -Wno-global-constructors -Wno-undef
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
)
target_link_libraries
(
${
TEST_NAME
}
PRIVATE gtest_main
getopt::getopt
)
add_test
(
NAME
${
TEST_NAME
}
COMMAND $<TARGET_FILE:
${
TEST_NAME
}
>
)
rocm_install
(
TARGETS
${
TEST_NAME
}
COMPONENT tests
)
set
(
result 0
)
endif
()
#message("add_gtest returns ${result}")
set
(
result
${
result
}
PARENT_SCOPE
)
endfunction
(
add_gtest_executable TEST_NAME
)
endfunction
()
add_subdirectory
(
magic_number_division
)
add_subdirectory
(
space_filling_curve
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment