Unverified Commit 71828202 authored by arai713's avatar arai713 Committed by GitHub
Browse files

Merge branch 'develop' into codegen_build

parents 0dcbaffb 3528a523
...@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME) ...@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
set(FMHA_FWD_FAST_EXP2 true) set(FMHA_FWD_FAST_EXP2 true)
endif() endif()
if(FMHA_FWD_FAST_EXP2) if(FMHA_FWD_FAST_EXP2)
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero) list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
else() else()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0) list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
endif() endif()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal) list(APPEND FMHA_COMPILE_OPTIONS -Wno-float-equal)
target_compile_options(device_mha_instance PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS}) list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_SPLITKV_API=1)
list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1)
target_compile_options(device_mha_instance PRIVATE ${FMHA_COMPILE_OPTIONS})
endif() endif()
target_compile_features(${INSTANCE_NAME} PUBLIC) target_compile_features(${INSTANCE_NAME} PUBLIC)
......
...@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt) ...@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt) file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt)
endif() endif()
set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
# generate a list of kernels, but not actually emit files at config stage # generate a list of kernels, but not actually emit files at config stage
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process( execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt --list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
--api ${FMHA_KNOWN_APIS}
--receipt 3
RESULT_VARIABLE ret RESULT_VARIABLE ret
) )
if(ret AND NOT ret EQUAL 0) if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile MHA FAILED to genrate a list of kernels via Python.") message( FATAL_ERROR "CK Tile MHA FAILED to genrate a list of kernels via Python.")
else() else()
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_FWD_GEN_BLOBS) file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_GEN_BLOBS)
endif() endif()
# actually generate the kernel content now # actually generate the kernel content now
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command( add_custom_command(
OUTPUT ${FMHA_FWD_GEN_BLOBS} OUTPUT ${FMHA_GEN_BLOBS}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
--output_dir ${FMHA_CPP_FOLDER} --output_dir ${FMHA_CPP_FOLDER}
--api ${FMHA_KNOWN_APIS}
--receipt 3
COMMENT "Generating mha kernel (cpp) files now ..." COMMENT "Generating mha kernel (cpp) files now ..."
VERBATIM VERBATIM
) )
...@@ -57,12 +67,12 @@ add_custom_command( ...@@ -57,12 +67,12 @@ add_custom_command(
# have filename. Since, it was cauing the cmake # have filename. Since, it was cauing the cmake
# to throw "File name too long" # to throw "File name too long"
set(device_files) set(device_files)
foreach(filepath IN LISTS FMHA_FWD_GEN_BLOBS) foreach(filepath IN LISTS FMHA_GEN_BLOBS)
get_filename_component(filename ${filepath} NAME) get_filename_component(filename ${filepath} NAME)
# Append the filename to the device_files list # Append the filename to the device_files list
list(APPEND device_files ${filename}) list(APPEND device_files ${filename})
endforeach() endforeach()
add_custom_target(generate_cpp_files DEPENDS ${FMHA_FWD_GEN_BLOBS}) add_custom_target(generate_cpp_files DEPENDS ${FMHA_GEN_BLOBS})
add_instance_library(device_mha_instance ${device_files}) add_instance_library(device_mha_instance ${device_files})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment