Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
75b09986
Unverified
Commit
75b09986
authored
Sep 25, 2024
by
Po Yen Chen
Committed by
GitHub
Sep 25, 2024
Browse files
Merge branch 'develop' into ck_tile/fav3_fwd_sept
parents
f0ea8b9e
3528a523
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
114 additions
and
29 deletions
+114
-29
Jenkinsfile
Jenkinsfile
+92
-19
library/src/tensor_operation_instance/gpu/CMakeLists.txt
library/src/tensor_operation_instance/gpu/CMakeLists.txt
+6
-4
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+16
-6
No files found.
Jenkinsfile
View file @
75b09986
...
...
@@ -100,7 +100,15 @@ def getDockerImage(Map conf=[:]){
dockerArgs
=
dockerArgs
+
" --no-cache "
}
echo
"Docker Args: ${dockerArgs}"
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
//Check if image exists
def
retimage
try
...
...
@@ -125,7 +133,9 @@ def buildDocker(install_prefix){
def
image_name
=
getDockerImageName
()
echo
"Building Docker for ${image_name}"
def
dockerArgs
=
"--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' "
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerArgs
=
dockerArgs
+
" --no-cache "
}
echo
"Build Args: ${dockerArgs}"
try
{
if
(
params
.
BUILD_DOCKER
){
...
...
@@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){
"""
)
sh
cmd3
}
// reduce parallelism when compiling, clang uses too much memory
def
nt
=
nthreads
()
def
cmd
...
...
@@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){
}
else
{
setup_cmd
=
conf
.
get
(
"setup_cmd"
,
"${cmake_envs} cmake ${setup_args} .. "
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs}
dumb-init
make -j${nt} ${config_targets}"
)
build_cmd
=
conf
.
get
(
"build_cmd"
,
"${build_envs} make -j${nt} ${config_targets}"
)
}
cmd
=
conf
.
get
(
"cmd"
,
"""
${setup_cmd}
...
...
@@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){
dir
(
"build"
){
//build CK
sh
cmd
//run tests
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)){
//run tests
except when NO_CK_BUILD or BUILD_LEGACY_OS are set
if
(!
setup_args
.
contains
(
"NO_CK_BUILD"
)
&&
!
params
.
BUILD_LEGACY_OS
){
if
(
setup_args
.
contains
(
"gfx90a"
)
&&
params
.
NINJA_BUILD_TRACE
){
sh
"/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json"
archiveArtifacts
"ck_build_trace.json"
...
...
@@ -330,7 +341,15 @@ def buildHipClangJob(Map conf=[:]){
env
.
HSA_ENABLE_SDMA
=
0
checkout
scm
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
...
...
@@ -512,7 +531,16 @@ def Build_CK(Map conf=[:]){
env
.
DOCKER_BUILDKIT
=
1
checkout
scm
def
image
=
getDockerImageName
()
def
image
if
(
params
.
BUILD_LEGACY_OS
&&
conf
.
get
(
"docker_name"
,
""
)
!=
""
){
image
=
conf
.
get
(
"docker_name"
,
""
)
echo
"Using legacy docker: ${image}"
}
else
{
image
=
getDockerImageName
()
echo
"Using default docker: ${image}"
}
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
...
...
@@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){
if
(
params
.
COMPILER_VERSION
==
"amd-staging"
||
params
.
COMPILER_VERSION
==
"amd-mainline-open"
||
params
.
COMPILER_COMMIT
!=
""
){
dockerOpts
=
dockerOpts
+
" --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
if
(
params
.
BUILD_LEGACY_OS
){
dockerOpts
=
dockerOpts
+
" --env LD_LIBRARY_PATH='/opt/Python-3.8.13/lib' "
}
def
video_id
=
sh
(
returnStdout:
true
,
script:
'getent group video | cut -d: -f3'
)
def
render_id
=
sh
(
returnStdout:
true
,
script:
'getent group render | cut -d: -f3'
)
dockerOpts
=
dockerOpts
+
" --group-add=${video_id} --group-add=${render_id} "
...
...
@@ -707,7 +738,8 @@ CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCM
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false'''
:
""
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false
0 13 * * * % BUILD_LEGACY_OS=true '''
:
""
pipeline
{
agent
none
...
...
@@ -794,6 +826,10 @@ pipeline {
name:
"NINJA_BUILD_TRACE"
,
defaultValue:
false
,
description:
"Generate a ninja build trace (default: OFF)"
)
booleanParam
(
name:
"BUILD_LEGACY_OS"
,
defaultValue:
false
,
description:
"Try building CK with legacy OS dockers: RHEL8 and SLES15 (default: OFF)"
)
}
environment
{
dbuser
=
"${dbuser}"
...
...
@@ -946,7 +982,6 @@ pipeline {
{
parallel
{
stage
(
"Run CK_TILE_GEMM Tests on gfx90a"
)
{
when
{
...
...
@@ -965,7 +1000,6 @@ pipeline {
buildHipClangJobAndReboot
(
setup_args:
setup_args
,
no_reboot:
true
,
build_type:
'Release'
,
execute_cmd:
execute_args
)
cleanWs
()
}
}
stage
(
"Run CK_TILE_GEMM Tests on gfx942"
)
{
...
...
@@ -988,15 +1022,54 @@ pipeline {
}
}
}
stage
(
"Build CK and run Tests"
)
{
parallel
{
stage
(
"Build CK with RHEL8"
)
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
def
docker_name
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_rhel8_rocm6.3"
setup_args
=
""" -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
}
}
stage
(
"Build CK with SLES15"
)
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
def
docker_name
=
"${env.CK_DOCKERHUB_PRIVATE}:ck_sles15_rocm6.3"
setup_args
=
""" -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args
=
" "
}
steps
{
Build_CK_and_Reboot
(
setup_args:
setup_args
,
config_targets:
" "
,
no_reboot:
true
,
build_type:
'Release'
,
docker_name:
docker_name
)
cleanWs
()
}
}
stage
(
"Build CK for all gfx9 targets"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
...
...
@@ -1018,7 +1091,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx942"
)
}
environment
{
...
...
@@ -1038,7 +1111,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
...
...
@@ -1058,7 +1131,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
}
expression
{
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx90a"
)
}
environment
{
...
...
@@ -1077,7 +1150,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1030"
)
}
environment
{
...
...
@@ -1097,7 +1170,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1101"
)
}
environment
{
...
...
@@ -1117,7 +1190,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
BUILD_GFX12
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
}
expression
{
params
.
BUILD_GFX12
.
toBoolean
()
&&
!
params
.
RUN_FULL_QA
.
toBoolean
()
&&
!
params
.
BUILD_INSTANCES_ONLY
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
rocmnode
(
"gfx1201"
)
}
environment
{
...
...
@@ -1144,7 +1217,7 @@ pipeline {
{
when
{
beforeAgent
true
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
}
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
options
{
retry
(
1
)
}
agent
{
label
rocmnode
(
"gfx90a"
)}
...
...
@@ -1165,7 +1238,7 @@ pipeline {
stage
(
"Process results"
){
when
{
beforeAgent
true
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
}
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
agent
{
label
'mici'
}
steps
{
...
...
library/src/tensor_operation_instance/gpu/CMakeLists.txt
View file @
75b09986
...
...
@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
set
(
FMHA_FWD_FAST_EXP2 true
)
endif
()
if
(
FMHA_FWD_FAST_EXP2
)
list
(
APPEND
EXAMPLE_FMHA_FWD
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero
)
list
(
APPEND
FMHA
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero
)
else
()
list
(
APPEND
EXAMPLE_FMHA_FWD
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0
)
list
(
APPEND
FMHA
_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0
)
endif
()
list
(
APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal
)
target_compile_options
(
device_mha_instance PRIVATE
${
EXAMPLE_FMHA_FWD_COMPILE_OPTIONS
}
)
list
(
APPEND FMHA_COMPILE_OPTIONS -Wno-float-equal
)
list
(
APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_SPLITKV_API=1
)
list
(
APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1
)
target_compile_options
(
device_mha_instance PRIVATE
${
FMHA_COMPILE_OPTIONS
}
)
endif
()
target_compile_features
(
${
INSTANCE_NAME
}
PUBLIC
)
...
...
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
75b09986
...
...
@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
file
(
REMOVE
${
FMHA_CPP_FOLDER
}
/blob_list.txt
)
endif
()
set
(
FMHA_KNOWN_APIS
"fwd,fwd_splitkv,fwd_appendkv,bwd"
)
# generate a list of kernels, but not actually emit files at config stage
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process
(
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
RESULT_VARIABLE ret
)
if
(
ret AND NOT ret EQUAL 0
)
message
(
FATAL_ERROR
"CK Tile MHA FAILED to genrate a list of kernels via Python."
)
else
()
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_
FWD_
GEN_BLOBS
)
file
(
STRINGS
${
FMHA_CPP_FOLDER
}
/blob_list.txt FMHA_GEN_BLOBS
)
endif
()
# actually generate the kernel content now
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command
(
OUTPUT
${
FMHA_
FWD_
GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
CMAKE_SOURCE_DIR
}
/example/ck_tile/01_fmha
/generate.py
OUTPUT
${
FMHA_GEN_BLOBS
}
COMMAND
${
PYTHON_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--output_dir
${
FMHA_CPP_FOLDER
}
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
COMMENT
"Generating mha kernel (cpp) files now ..."
VERBATIM
)
...
...
@@ -57,12 +67,12 @@ add_custom_command(
# have filename. Since, it was cauing the cmake
# to throw "File name too long"
set
(
device_files
)
foreach
(
filepath IN LISTS FMHA_
FWD_
GEN_BLOBS
)
foreach
(
filepath IN LISTS FMHA_GEN_BLOBS
)
get_filename_component
(
filename
${
filepath
}
NAME
)
# Append the filename to the device_files list
list
(
APPEND device_files
${
filename
}
)
endforeach
()
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_
FWD_
GEN_BLOBS
}
)
add_custom_target
(
generate_cpp_files DEPENDS
${
FMHA_GEN_BLOBS
}
)
add_instance_library
(
device_mha_instance
${
device_files
}
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment