Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
71828202
Unverified
Commit
71828202
authored
Sep 24, 2024
by
arai713
Committed by
GitHub
Sep 24, 2024
Browse files
Merge branch 'develop' into codegen_build
parents
0dcbaffb
3528a523
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
22 additions
and
10 deletions
+22
-10
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+6
-4
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+16
-6
No files found.
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
71828202
...
@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
...
@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
set
(
FMHA_FWD_FAST_EXP2 true
)
set
(
FMHA_FWD_FAST_EXP2 true
)
endif
()
endif
()
if
(
FMHA_FWD_FAST_EXP2
)
if
(
FMHA_FWD_FAST_EXP2
)
list
(
APPEND
EXAMPLE_FMHA_FWD
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero
)
list
(
APPEND
FMHA
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero
)
else
()
else
()
list
(
APPEND
EXAMPLE_FMHA_FWD
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0
)
list
(
APPEND
FMHA
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0
)
endif
()
endif
()
list
(
APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal
)
list
(
APPEND FMHA_COMPILE_OPTIONS -Wno-float-equal
)
target_compile_options
(
device_mha_instance PRIVATE
${
EXAMPLE_FMHA_FWD_COMPILE_OPTIONS
}
)
list
(
APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_SPLITKV_API=1
)
list
(
APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1
)
target_compile_options
(
device_mha_instance PRIVATE
${
FMHA_COMPILE_OPTIONS
}
)
endif
()
endif
()
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
...
...
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
71828202
...
@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
...
@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
endif
()
endif
()
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
# generate a list of kernels, but not actually emit files at config stage
# generate a list of kernels, but not actually emit files at config stage
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process
(
execute_process
(
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
RESULT_VARIABLE ret
RESULT_VARIABLE ret
)
)
if
(
ret AND NOT ret EQUAL 0
)
if
(
ret AND NOT ret EQUAL 0
)
message
(
FATAL_ERROR
"CK Tile MHA FAILED to genrate a list of kernels via Python."
)
message
(
FATAL_ERROR
"CK Tile MHA FAILED to genrate a list of kernels via Python."
)
else
()
else
()
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_
FWD_
GEN_BLOBS
)
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_GEN_BLOBS
)
endif
()
endif
()
# actually generate the kernel content now
# actually generate the kernel content now
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command
(
add_custom_command
(
OUTPUT
${
FMHA_
FWD_
GEN_BLOBS
}
OUTPUT
${
FMHA_GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--output_dir
${
FMHA_CPP_FOLDER
}
--output_dir
${
FMHA_CPP_FOLDER
}
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
COMMENT
"Generating mha kernel (cpp) files now ..."
COMMENT
"Generating mha kernel (cpp) files now ..."
VERBATIM
VERBATIM
)
)
...
@@ -57,12 +67,12 @@ add_custom_command(
...
@@ -57,12 +67,12 @@ add_custom_command(
# have filename. Since, it was cauing the cmake
# have filename. Since, it was cauing the cmake
# to throw "File name too long"
# to throw "File name too long"
set
(
device_files
)
set
(
device_files
)
foreach
(
filepath IN LISTS FMHA_
FWD_
GEN_BLOBS
)
foreach
(
filepath IN LISTS FMHA_GEN_BLOBS
)
get_filename_component
(
filename
${
filepath
}
NAME
)
get_filename_component
(
filename
${
filepath
}
NAME
)
# Append the filename to the device_files list
# Append the filename to the device_files list
list
(
APPEND device_files
${
filename
}
)
list
(
APPEND device_files
${
filename
}
)
endforeach
()
endforeach
()
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_
FWD_
GEN_BLOBS
}
)
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_GEN_BLOBS
}
)
add_instance_library
(
device_mha_instance
${
device_files
}
)
add_instance_library
(
device_mha_instance
${
device_files
}
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment