Unverified Commit 75b09986 authored by Po Yen Chen's avatar Po Yen Chen Committed by GitHub
Browse files

Merge branch 'develop' into ck_tile/fav3_fwd_sept

parents f0ea8b9e 3528a523
...@@ -100,7 +100,15 @@ def getDockerImage(Map conf=[:]){ ...@@ -100,7 +100,15 @@ def getDockerImage(Map conf=[:]){
dockerArgs = dockerArgs + " --no-cache " dockerArgs = dockerArgs + " --no-cache "
} }
echo "Docker Args: ${dockerArgs}" echo "Docker Args: ${dockerArgs}"
def image = getDockerImageName() def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
//Check if image exists //Check if image exists
def retimage def retimage
try try
...@@ -125,7 +133,9 @@ def buildDocker(install_prefix){ ...@@ -125,7 +133,9 @@ def buildDocker(install_prefix){
def image_name = getDockerImageName() def image_name = getDockerImageName()
echo "Building Docker for ${image_name}" echo "Building Docker for ${image_name}"
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' " def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' "
if(params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
dockerArgs = dockerArgs + " --no-cache "
}
echo "Build Args: ${dockerArgs}" echo "Build Args: ${dockerArgs}"
try{ try{
if(params.BUILD_DOCKER){ if(params.BUILD_DOCKER){
...@@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){ ...@@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){
""") """)
sh cmd3 sh cmd3
} }
// reduce parallelism when compiling, clang uses too much memory // reduce parallelism when compiling, clang uses too much memory
def nt = nthreads() def nt = nthreads()
def cmd def cmd
...@@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){ ...@@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){
} }
else{ else{
setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args} .. ") setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args} .. ")
build_cmd = conf.get("build_cmd", "${build_envs} dumb-init make -j${nt} ${config_targets}") build_cmd = conf.get("build_cmd", "${build_envs} make -j${nt} ${config_targets}")
} }
cmd = conf.get("cmd", """ cmd = conf.get("cmd", """
${setup_cmd} ${setup_cmd}
...@@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){ ...@@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){
dir("build"){ dir("build"){
//build CK //build CK
sh cmd sh cmd
//run tests //run tests except when NO_CK_BUILD or BUILD_LEGACY_OS are set
if(!setup_args.contains("NO_CK_BUILD")){ if(!setup_args.contains("NO_CK_BUILD") && !params.BUILD_LEGACY_OS){
if (setup_args.contains("gfx90a") && params.NINJA_BUILD_TRACE){ if (setup_args.contains("gfx90a") && params.NINJA_BUILD_TRACE){
sh "/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json" sh "/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json"
archiveArtifacts "ck_build_trace.json" archiveArtifacts "ck_build_trace.json"
...@@ -330,7 +341,15 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -330,7 +341,15 @@ def buildHipClangJob(Map conf=[:]){
env.HSA_ENABLE_SDMA=0 env.HSA_ENABLE_SDMA=0
checkout scm checkout scm
def image = getDockerImageName() def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
def prefixpath = conf.get("prefixpath", "/opt/rocm") def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
...@@ -512,7 +531,16 @@ def Build_CK(Map conf=[:]){ ...@@ -512,7 +531,16 @@ def Build_CK(Map conf=[:]){
env.DOCKER_BUILDKIT=1 env.DOCKER_BUILDKIT=1
checkout scm checkout scm
def image = getDockerImageName() def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
def prefixpath = conf.get("prefixpath", "/opt/rocm") def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
...@@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){ ...@@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){
if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){ if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' " dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
} }
if(params.BUILD_LEGACY_OS){
dockerOpts = dockerOpts + " --env LD_LIBRARY_PATH='/opt/Python-3.8.13/lib' "
}
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3') def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3') def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} " dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
...@@ -707,7 +738,8 @@ CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCM ...@@ -707,7 +738,8 @@ CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCM
0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true 0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true 0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true 0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false;NINJA_BUILD_TRACE=true
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : "" 0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false
0 13 * * * % BUILD_LEGACY_OS=true ''' : ""
pipeline { pipeline {
agent none agent none
...@@ -794,6 +826,10 @@ pipeline { ...@@ -794,6 +826,10 @@ pipeline {
name: "NINJA_BUILD_TRACE", name: "NINJA_BUILD_TRACE",
defaultValue: false, defaultValue: false,
description: "Generate a ninja build trace (default: OFF)") description: "Generate a ninja build trace (default: OFF)")
booleanParam(
name: "BUILD_LEGACY_OS",
defaultValue: false,
description: "Try building CK with legacy OS dockers: RHEL8 and SLES15 (default: OFF)")
} }
environment{ environment{
dbuser = "${dbuser}" dbuser = "${dbuser}"
...@@ -946,7 +982,6 @@ pipeline { ...@@ -946,7 +982,6 @@ pipeline {
{ {
parallel parallel
{ {
stage("Run CK_TILE_GEMM Tests on gfx90a") stage("Run CK_TILE_GEMM Tests on gfx90a")
{ {
when { when {
...@@ -965,7 +1000,6 @@ pipeline { ...@@ -965,7 +1000,6 @@ pipeline {
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
cleanWs() cleanWs()
} }
} }
stage("Run CK_TILE_GEMM Tests on gfx942") stage("Run CK_TILE_GEMM Tests on gfx942")
{ {
...@@ -988,15 +1022,54 @@ pipeline { ...@@ -988,15 +1022,54 @@ pipeline {
} }
} }
} }
stage("Build CK and run Tests") stage("Build CK and run Tests")
{ {
parallel parallel
{ {
stage("Build CK with RHEL8")
{
when {
beforeAgent true
expression { params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_rhel8_rocm6.3"
setup_args = """ -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args = " "
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name)
cleanWs()
}
}
stage("Build CK with SLES15")
{
when {
beforeAgent true
expression { params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_sles15_rocm6.3"
setup_args = """ -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args = " "
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name)
cleanWs()
}
}
stage("Build CK for all gfx9 targets") stage("Build CK for all gfx9 targets")
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() } expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx90a") } agent{ label rocmnode("gfx90a") }
environment{ environment{
...@@ -1018,7 +1091,7 @@ pipeline { ...@@ -1018,7 +1091,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() } expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx942") } agent{ label rocmnode("gfx942") }
environment{ environment{
...@@ -1038,7 +1111,7 @@ pipeline { ...@@ -1038,7 +1111,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx90a") } agent{ label rocmnode("gfx90a") }
environment{ environment{
...@@ -1058,7 +1131,7 @@ pipeline { ...@@ -1058,7 +1131,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() } expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx90a") } agent{ label rocmnode("gfx90a") }
environment{ environment{
...@@ -1077,7 +1150,7 @@ pipeline { ...@@ -1077,7 +1150,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx1030") } agent{ label rocmnode("gfx1030") }
environment{ environment{
...@@ -1097,7 +1170,7 @@ pipeline { ...@@ -1097,7 +1170,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx1101") } agent{ label rocmnode("gfx1101") }
environment{ environment{
...@@ -1117,7 +1190,7 @@ pipeline { ...@@ -1117,7 +1190,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() } expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent{ label rocmnode("gfx1201") } agent{ label rocmnode("gfx1201") }
environment{ environment{
...@@ -1144,7 +1217,7 @@ pipeline { ...@@ -1144,7 +1217,7 @@ pipeline {
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() } expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
options { retry(1) } options { retry(1) }
agent{ label rocmnode("gfx90a")} agent{ label rocmnode("gfx90a")}
...@@ -1165,7 +1238,7 @@ pipeline { ...@@ -1165,7 +1238,7 @@ pipeline {
stage("Process results"){ stage("Process results"){
when { when {
beforeAgent true beforeAgent true
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() } expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
} }
agent { label 'mici' } agent { label 'mici' }
steps{ steps{
......
...@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME) ...@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
set(FMHA_FWD_FAST_EXP2 true) set(FMHA_FWD_FAST_EXP2 true)
endif() endif()
if(FMHA_FWD_FAST_EXP2) if(FMHA_FWD_FAST_EXP2)
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero) list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
else() else()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0) list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
endif() endif()
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal) list(APPEND FMHA_COMPILE_OPTIONS -Wno-float-equal)
target_compile_options(device_mha_instance PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS}) list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_SPLITKV_API=1)
list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1)
target_compile_options(device_mha_instance PRIVATE ${FMHA_COMPILE_OPTIONS})
endif() endif()
target_compile_features(${INSTANCE_NAME} PUBLIC) target_compile_features(${INSTANCE_NAME} PUBLIC)
......
...@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt) ...@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt) file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt)
endif() endif()
set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
# generate a list of kernels, but not actually emit files at config stage # generate a list of kernels, but not actually emit files at config stage
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process( execute_process(
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt --list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
--api ${FMHA_KNOWN_APIS}
--receipt 3
RESULT_VARIABLE ret RESULT_VARIABLE ret
) )
if(ret AND NOT ret EQUAL 0) if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile MHA FAILED to genrate a list of kernels via Python.") message( FATAL_ERROR "CK Tile MHA FAILED to genrate a list of kernels via Python.")
else() else()
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_FWD_GEN_BLOBS) file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_GEN_BLOBS)
endif() endif()
# actually generate the kernel content now # actually generate the kernel content now
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command( add_custom_command(
OUTPUT ${FMHA_FWD_GEN_BLOBS} OUTPUT ${FMHA_GEN_BLOBS}
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
--output_dir ${FMHA_CPP_FOLDER} --output_dir ${FMHA_CPP_FOLDER}
--api ${FMHA_KNOWN_APIS}
--receipt 3
COMMENT "Generating mha kernel (cpp) files now ..." COMMENT "Generating mha kernel (cpp) files now ..."
VERBATIM VERBATIM
) )
...@@ -57,12 +67,12 @@ add_custom_command( ...@@ -57,12 +67,12 @@ add_custom_command(
# have filename. Since, it was cauing the cmake # have filename. Since, it was cauing the cmake
# to throw "File name too long" # to throw "File name too long"
set(device_files) set(device_files)
foreach(filepath IN LISTS FMHA_FWD_GEN_BLOBS) foreach(filepath IN LISTS FMHA_GEN_BLOBS)
get_filename_component(filename ${filepath} NAME) get_filename_component(filename ${filepath} NAME)
# Append the filename to the device_files list # Append the filename to the device_files list
list(APPEND device_files ${filename}) list(APPEND device_files ${filename})
endforeach() endforeach()
add_custom_target(generate_cpp_files DEPENDS ${FMHA_FWD_GEN_BLOBS}) add_custom_target(generate_cpp_files DEPENDS ${FMHA_GEN_BLOBS})
add_instance_library(device_mha_instance ${device_files}) add_instance_library(device_mha_instance ${device_files})
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment