Unverified Commit 840c5397 authored by bibek's avatar bibek Committed by GitHub
Browse files

adding mha as static lib (#1366)



* adding mha as static lib

* add fmha fwd compile options

* typo

* fix python version

* python version to 3

* increase path length

* add max path flag in mha cmake

* fix long path issue

* mha currently only runs in gfx94x

* only buld mha in mi300

* populate gpu_list

* add mha compile flags

* avoid building mha in gpu other then gfx94x

* some comments and  include ck_tile in rocm

* use rocm_install

* place ck_tile in include

* correct ck_tile path

---------
Co-authored-by: default avatarIllia Silin <98187287+illsilin@users.noreply.github.com>
parent b74d4d4d
......@@ -64,6 +64,13 @@ function(add_instance_library INSTANCE_NAME)
list(REMOVE_ITEM ARGN "${source}")
endif()
endforeach()
# Do not build mha instances if gfx94 targets are not on the target list
foreach(source IN LISTS ARGN)
if(NOT INST_TARGETS MATCHES "gfx94" AND source MATCHES "mha")
message("removing mha instance ${source} ")
list(REMOVE_ITEM ARGN "${source}")
endif()
endforeach()
#only continue if there are some source files left on the list
if(ARGN)
set(INST_OBJ)
......@@ -77,6 +84,8 @@ function(add_instance_library INSTANCE_NAME)
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103)
elseif(ARGN MATCHES "_wmma")
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
elseif(ARGN MATCHES "mha")
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103)
endif()
set(offload_targets)
foreach(target IN LISTS INST_TARGETS)
......@@ -86,6 +95,21 @@ function(add_instance_library INSTANCE_NAME)
list(APPEND INST_OBJ ${source})
endforeach()
add_library(${INSTANCE_NAME} OBJECT ${INST_OBJ})
# Allow comparing floating points directly in order to check sentinel values
if(${INSTANCE_NAME} STREQUAL "device_mha_instance")
if(NOT DEFINED FMHA_FWD_FAST_EXP2)
set(FMHA_FWD_FAST_EXP2 true)
endif()
if(FMHA_FWD_FAST_EXP2)
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
else()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
endif()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal)
target_compile_options(device_mha_instance PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS})
endif()
target_compile_features(${INSTANCE_NAME} PUBLIC)
# flags to compress the library
......@@ -293,20 +317,22 @@ if(CK_DEVICE_CONV_INSTANCES)
)
endif()
if(CK_DEVICE_MHA_INSTANCES)
add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES})
add_library(composablekernels::device_mha_operations ALIAS device_mha_operations)
target_compile_features(device_mha_operations PUBLIC)
set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(device_mha_operations PUBLIC
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/mha>
)
rocm_install(TARGETS device_mha_operations
EXPORT device_mha_operationsTargets)
rocm_install(EXPORT device_mha_operationsTargets
FILE composable_kerneldevice_mha_operationsTargets.cmake
NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
)
set(gpu_list ${INST_TARGETS})
list(FILTER gpu_list INCLUDE REGEX "^gfx94")
if(gpu_list)
add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES})
add_library(composablekernels::device_mha_operations ALIAS device_mha_operations)
target_compile_features(device_mha_operations PUBLIC)
set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
rocm_install(TARGETS device_mha_operations
EXPORT device_mha_operationsTargets)
rocm_install(EXPORT device_mha_operationsTargets
FILE composable_kerneldevice_mha_operationsTargets.cmake
NAMESPACE composable_kernel::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
)
endif()
endif()
if(CK_DEVICE_CONTRACTION_INSTANCES)
add_library(device_contraction_operations STATIC ${CK_DEVICE_CONTRACTION_INSTANCES})
......
set(FMHA_CPP_FOLDER ${CMAKE_CURRENT_BINARY_DIR})
set(FMHA_SRC_FOLDER ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/)
set(CK_TILE_SRC_FOLDER ${CMAKE_SOURCE_DIR}/include/ck_tile/)
# python stuff
find_package(PythonInterp 3 REQUIRED)
rocm_install(DIRECTORY ${CK_TILE_SRC_FOLDER} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck_tile)
rocm_install(FILES
"${FMHA_SRC_FOLDER}/fmha_fwd.hpp"
"${FMHA_SRC_FOLDER}/bias.hpp"
"${FMHA_SRC_FOLDER}/mask.hpp"
DESTINATION include/ck_tile/ops
)
# header for building lib
file(COPY ${FMHA_SRC_FOLDER}/fmha_fwd.hpp DESTINATION ${FMHA_CPP_FOLDER})
file(COPY ${FMHA_SRC_FOLDER}/bias.hpp DESTINATION ${FMHA_CPP_FOLDER})
file(COPY ${FMHA_SRC_FOLDER}/mask.hpp DESTINATION ${FMHA_CPP_FOLDER})
# generate a list of kernels, but not actually emit files at config stage
execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
)
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_FWD_GEN_BLOBS)
# actually generate the cpp files
add_custom_command(
OUTPUT ${FMHA_FWD_GEN_BLOBS}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
--output_dir ${FMHA_CPP_FOLDER}
COMMENT "Generating mha kernel (cpp) files now ..."
VERBATIM
)
# This is done to remove path info and just
# have filename. Since, it was cauing the cmake
# to throw "File name too long"
set(device_files)
foreach(filepath IN LISTS FMHA_FWD_GEN_BLOBS)
get_filename_component(filename ${filepath} NAME)
# Append the filename to the device_files list
list(APPEND device_files ${filename})
endforeach()
add_custom_target(generate_cpp_files DEPENDS ${FMHA_FWD_GEN_BLOBS})
add_instance_library(device_mha_instance ${device_files})
if (TARGET device_mha_instance)
add_dependencies(device_mha_instance generate_cpp_files)
endif()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment