Commit 0eb75e21 authored by carlushuang's avatar carlushuang
Browse files

Merge remote-tracking branch 'origin/develop' into ck_tile/moe

parents 1b4b640b c8b6b642
...@@ -62,8 +62,14 @@ if (DTYPES) ...@@ -62,8 +62,14 @@ if (DTYPES)
endif() endif()
message("DTYPES macro set to ${DTYPES}") message("DTYPES macro set to ${DTYPES}")
else() else()
add_definitions(-DCK_ENABLE_INT8 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16) add_definitions(-DCK_ENABLE_INT8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8)
set(CK_ENABLE_ALL_DTYPES "ON") set(CK_ENABLE_INT8 "ON")
set(CK_ENABLE_FP16 "ON")
set(CK_ENABLE_FP32 "ON")
set(CK_ENABLE_FP64 "ON")
set(CK_ENABLE_BF16 "ON")
set(CK_ENABLE_FP8 "ON")
set(CK_ENABLE_BF8 "ON")
endif() endif()
#for f8/bf8_t type #for f8/bf8_t type
...@@ -106,21 +112,33 @@ list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/ll ...@@ -106,21 +112,33 @@ list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/ll
message("GPU_TARGETS= ${GPU_TARGETS}") message("GPU_TARGETS= ${GPU_TARGETS}")
find_package(hip)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
# SWDEV-413293 and https://reviews.llvm.org/D155213
math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) * 100000 + ${hip_VERSION_PATCH}")
message("hip_version_flat=${hip_VERSION_FLAT}")
message("checking which targets are supported") message("checking which targets are supported")
#This is the list of targets to be used in case GPU_TARGETS is not set on command line #This is the list of targets to be used in case GPU_TARGETS is not set on command line
#These targets will be filtered and only supported ones will be used #These targets will be filtered and only supported ones will be used
#Setting GPU_TARGETS on command line will override this list #Setting GPU_TARGETS on command line will override this list
if(NOT PROFILER_ONLY) if(NOT PROFILER_ONLY)
if(NOT ENABLE_ASAN_PACKAGING) if(NOT ENABLE_ASAN_PACKAGING)
#build CK for all supported targets #build CK for all supported targets
rocm_check_target_ids(DEFAULT_GPU_TARGETS if(NOT WIN32 AND ${hip_VERSION_FLAT} LESS 600300000)
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201") # WORKAROUND: compiler does not yet fully support gfx12 targets, need to fix version above
else() rocm_check_target_ids(DEFAULT_GPU_TARGETS
#build CK only for xnack-supported targets TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
rocm_check_target_ids(DEFAULT_GPU_TARGETS else()
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+") rocm_check_target_ids(DEFAULT_GPU_TARGETS
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE) TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201")
endif() endif()
else()
#build CK only for xnack-supported targets
rocm_check_target_ids(DEFAULT_GPU_TARGETS
TARGETS "gfx908:xnack+;gfx90a:xnack+;gfx940:xnack+;gfx941:xnack+;gfx942:xnack+")
set(GPU_TARGETS "${DEFAULT_GPU_TARGETS}" CACHE STRING " " FORCE)
endif()
else() else()
add_definitions(-DPROFILER_ONLY) add_definitions(-DPROFILER_ONLY)
set(GPU_TARGETS "" CACHE STRING "" FORCE) set(GPU_TARGETS "" CACHE STRING "" FORCE)
...@@ -169,20 +187,23 @@ endif() ...@@ -169,20 +187,23 @@ endif()
# CK config file to record supported datatypes, etc. # CK config file to record supported datatypes, etc.
configure_file(include/ck/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/config.h) configure_file(include/ck/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/config.h)
find_package(hip)
# No assumption that HIP kernels are launched with uniform block size for backward compatibility
# SWDEV-413293 and https://reviews.llvm.org/D155213
math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) * 100000 + ${hip_VERSION_PATCH}")
message("hip_version_flat=${hip_VERSION_FLAT}")
if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 500723302) if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 500723302)
message("Adding the fno-offload-uniform-block compiler flag") check_cxx_compiler_flag("-fno-offload-uniform-block" HAS_NO_OFFLOAD_UNIFORM_BLOCK)
add_compile_options(-fno-offload-uniform-block) if(HAS_NO_OFFLOAD_UNIFORM_BLOCK)
message("Adding the fno-offload-uniform-block compiler flag")
add_compile_options(-fno-offload-uniform-block)
endif()
endif() endif()
if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600140090) if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600140090)
message("Adding the enable-post-misched=0 compiler flag") check_cxx_compiler_flag("-mllvm -enable-post-misched=0" HAS_ENABLE_POST_MISCHED)
add_compile_options("SHELL: -mllvm -enable-post-misched=0") if(HAS_ENABLE_POST_MISCHED)
message("Adding the enable-post-misched=0 compiler flag")
add_compile_options("SHELL: -mllvm -enable-post-misched=0")
endif()
endif() endif()
if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hip_VERSION_FLAT} LESS 600300000) set(check-coerce)
check_cxx_compiler_flag(" -mllvm -amdgpu-coerce-illegal-types=1" check-coerce)
if(NOT WIN32 AND check-coerce AND ${hip_VERSION_FLAT} GREATER 600241132 AND ${hip_VERSION_FLAT} LESS 600300000)
message("Adding the amdgpu-coerce-illegal-types=1") message("Adding the amdgpu-coerce-illegal-types=1")
add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1") add_compile_options("SHELL: -mllvm -amdgpu-coerce-illegal-types=1")
endif() endif()
...@@ -532,6 +553,9 @@ if(NOT DEFINED INSTANCES_ONLY) ...@@ -532,6 +553,9 @@ if(NOT DEFINED INSTANCES_ONLY)
PACKAGE_NAME examples PACKAGE_NAME examples
) )
add_subdirectory(example) add_subdirectory(example)
if(GPU_TARGETS MATCHES "gfx9" AND NOT INSTANCES_ONLY)
add_subdirectory(codegen)
endif()
if(BUILD_TESTING) if(BUILD_TESTING)
add_subdirectory(test) add_subdirectory(test)
endif() endif()
......
FROM ubuntu:20.04 FROM ubuntu:20.04
ARG DEBIAN_FRONTEND=noninteractive ARG DEBIAN_FRONTEND=noninteractive
ARG ROCMVERSION=6.1 ARG ROCMVERSION=6.2
ARG compiler_version="" ARG compiler_version=""
ARG compiler_commit="" ARG compiler_commit=""
ARG CK_SCCACHE="" ARG CK_SCCACHE=""
...@@ -17,17 +17,12 @@ RUN apt-get install -y --allow-unauthenticated apt-utils wget gnupg2 curl ...@@ -17,17 +17,12 @@ RUN apt-get install -y --allow-unauthenticated apt-utils wget gnupg2 curl
ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn ENV APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=DontWarn
RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg RUN curl -fsSL https://repo.radeon.com/rocm/rocm.gpg.key | gpg --dearmor -o /etc/apt/trusted.gpg.d/rocm-keyring.gpg
RUN if [ "$ROCMVERSION" != "6.2" ]; then \ RUN if [ "$ROCMVERSION" != "6.3" ]; then \
sh -c "wget https://repo.radeon.com/amdgpu-install/6.1/ubuntu/focal/amdgpu-install_6.1.60100-1_all.deb --no-check-certificate" && \ sh -c "wget https://repo.radeon.com/amdgpu-install/$ROCMVERSION/ubuntu/focal/amdgpu-install_6.2.60200-1_all.deb --no-check-certificate" && \
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ./amdgpu-install_6.1.60100-1_all.deb && \ apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ./amdgpu-install_6.2.60200-1_all.deb && \
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \ wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
sh -c "echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] $DEB_ROCM_REPO focal main > /etc/apt/sources.list.d/rocm.list" && \ sh -c "echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] $DEB_ROCM_REPO focal main > /etc/apt/sources.list.d/rocm.list" && \
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCMVERSION/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list'; \ sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCMVERSION/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list'; \
elif [ "$ROCMVERSION" = "6.2" ] && [ "$compiler_version" = "rc4" ]; then \
sh -c "wget http://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/amdgpu-install-internal_6.2-20.04-1_all.deb --no-check-certificate" && \
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install dialog libpopt0 rsync && DEBIAN_FRONTEND=noninteractive apt-get install ./amdgpu-install-internal_6.2-20.04-1_all.deb && \
sh -c 'echo deb [arch=amd64 trusted=yes] http://compute-artifactory.amd.com/artifactory/list/rocm-release-archive-20.04-deb/ 6.2 rel-63 > /etc/apt/sources.list.d/rocm-build.list' && \
amdgpu-repo --amdgpu-build=2009461; \
fi fi
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
...@@ -64,6 +59,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow- ...@@ -64,6 +59,7 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
python3-dev \ python3-dev \
python3-pip \ python3-pip \
redis \ redis \
rocm-llvm-dev \
sshpass \ sshpass \
stunnel \ stunnel \
software-properties-common \ software-properties-common \
......
...@@ -38,7 +38,7 @@ def getDockerImageName(){ ...@@ -38,7 +38,7 @@ def getDockerImageName(){
img = "${params.USE_CUSTOM_DOCKER}" img = "${params.USE_CUSTOM_DOCKER}"
} }
else{ else{
if (params.ROCMVERSION != "6.2"){ if (params.ROCMVERSION != "6.3"){
if (params.COMPILER_VERSION == "") { if (params.COMPILER_VERSION == "") {
img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}" img = "${env.CK_DOCKERHUB}:ck_ub20.04_rocm${params.ROCMVERSION}"
} }
...@@ -204,6 +204,9 @@ def cmake_build(Map conf=[:]){ ...@@ -204,6 +204,9 @@ def cmake_build(Map conf=[:]){
cd build cd build
""" """
def invocation_tag="" def invocation_tag=""
if (setup_args.contains("gfx12")){
invocation_tag="gfx12"
}
if (setup_args.contains("gfx11")){ if (setup_args.contains("gfx11")){
invocation_tag="gfx11" invocation_tag="gfx11"
} }
...@@ -285,6 +288,19 @@ def cmake_build(Map conf=[:]){ ...@@ -285,6 +288,19 @@ def cmake_build(Map conf=[:]){
if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "amd-master")) { if (package_build == true && (env.BRANCH_NAME == "develop" || env.BRANCH_NAME == "amd-master")) {
archiveArtifacts artifacts: "build/*.deb", allowEmptyArchive: true, fingerprint: true archiveArtifacts artifacts: "build/*.deb", allowEmptyArchive: true, fingerprint: true
} }
if (params.RUN_CK_TILE_TESTS){
try{
archiveArtifacts "perf_fmha_fwd_*.log"
archiveArtifacts "perf_fmha_bwd_*.log"
stash name: "perf_fmha_fwd_gfx942.log"
stash name: "perf_fmha_bwd_gfx942.log"
stash name: "perf_fmha_fwd_gfx90a.log"
stash name: "perf_fmha_bwd_gfx90a.log"
}
catch(Exception err){
echo "could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing."
}
}
} }
def buildHipClangJob(Map conf=[:]){ def buildHipClangJob(Map conf=[:]){
...@@ -297,7 +313,7 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -297,7 +313,7 @@ def buildHipClangJob(Map conf=[:]){
def prefixpath = conf.get("prefixpath", "/opt/rocm") def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" def dockerOpts="--rm --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if (conf.get("enforce_xnack_on", false)) { if (conf.get("enforce_xnack_on", false)) {
dockerOpts = dockerOpts + " --env HSA_XNACK=1 " dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
} }
...@@ -356,7 +372,7 @@ def runCKProfiler(Map conf=[:]){ ...@@ -356,7 +372,7 @@ def runCKProfiler(Map conf=[:]){
def prefixpath = conf.get("prefixpath", "/opt/rocm") def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" def dockerOpts="--rm --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if (conf.get("enforce_xnack_on", false)) { if (conf.get("enforce_xnack_on", false)) {
dockerOpts = dockerOpts + " --env HSA_XNACK=1 " dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
} }
...@@ -410,8 +426,9 @@ def runCKProfiler(Map conf=[:]){ ...@@ -410,8 +426,9 @@ def runCKProfiler(Map conf=[:]){
archiveArtifacts "perf_resnet50_N4.log" archiveArtifacts "perf_resnet50_N4.log"
archiveArtifacts "perf_batched_gemm.log" archiveArtifacts "perf_batched_gemm.log"
archiveArtifacts "perf_grouped_gemm.log" archiveArtifacts "perf_grouped_gemm.log"
archiveArtifacts "perf_conv_fwd.log" archiveArtifacts "perf_grouped_conv_fwd.log"
archiveArtifacts "perf_conv_bwd_data.log" archiveArtifacts "perf_grouped_conv_bwd_data.log"
archiveArtifacts "perf_grouped_conv_bwd_weight.log"
archiveArtifacts "perf_gemm_bilinear.log" archiveArtifacts "perf_gemm_bilinear.log"
archiveArtifacts "perf_reduction.log" archiveArtifacts "perf_reduction.log"
archiveArtifacts "perf_splitK_gemm.log" archiveArtifacts "perf_splitK_gemm.log"
...@@ -423,8 +440,9 @@ def runCKProfiler(Map conf=[:]){ ...@@ -423,8 +440,9 @@ def runCKProfiler(Map conf=[:]){
stash name: "perf_resnet50_N4.log" stash name: "perf_resnet50_N4.log"
stash name: "perf_batched_gemm.log" stash name: "perf_batched_gemm.log"
stash name: "perf_grouped_gemm.log" stash name: "perf_grouped_gemm.log"
stash name: "perf_conv_fwd.log" stash name: "perf_grouped_conv_fwd.log"
stash name: "perf_conv_bwd_data.log" stash name: "perf_grouped_conv_bwd_data.log"
stash name: "perf_grouped_conv_bwd_weight.log"
stash name: "perf_gemm_bilinear.log" stash name: "perf_gemm_bilinear.log"
stash name: "perf_reduction.log" stash name: "perf_reduction.log"
stash name: "perf_splitK_gemm.log" stash name: "perf_splitK_gemm.log"
...@@ -477,7 +495,7 @@ def Build_CK(Map conf=[:]){ ...@@ -477,7 +495,7 @@ def Build_CK(Map conf=[:]){
def prefixpath = conf.get("prefixpath", "/opt/rocm") def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
def dockerOpts="--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined" def dockerOpts="--rm --device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if (conf.get("enforce_xnack_on", false)) { if (conf.get("enforce_xnack_on", false)) {
dockerOpts = dockerOpts + " --env HSA_XNACK=1 " dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
} }
...@@ -518,7 +536,7 @@ def Build_CK(Map conf=[:]){ ...@@ -518,7 +536,7 @@ def Build_CK(Map conf=[:]){
//check whether to run performance tests on this node //check whether to run performance tests on this node
def do_perf_tests = 0 def do_perf_tests = 0
sh 'rocminfo | tee rocminfo.log' sh 'rocminfo | tee rocminfo.log'
if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') || runShell('grep -n "gfx942" rocminfo.log') ){ if ( runShell('grep -n "gfx1030" rocminfo.log') || runShell('grep -n "gfx1101" rocminfo.log') || runShell('grep -n "gfx1201" rocminfo.log') || runShell('grep -n "gfx942" rocminfo.log') ){
do_perf_tests = 1 do_perf_tests = 1
echo "Stash profiler and run performance tests" echo "Stash profiler and run performance tests"
} }
...@@ -590,7 +608,7 @@ def process_results(Map conf=[:]){ ...@@ -590,7 +608,7 @@ def process_results(Map conf=[:]){
def prefixpath = "/opt/rocm" def prefixpath = "/opt/rocm"
// Jenkins is complaining about the render group // Jenkins is complaining about the render group
def dockerOpts="--cap-add=SYS_PTRACE --security-opt seccomp=unconfined" def dockerOpts="--rm --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if (conf.get("enforce_xnack_on", false)) { if (conf.get("enforce_xnack_on", false)) {
dockerOpts = dockerOpts + " --env HSA_XNACK=1 " dockerOpts = dockerOpts + " --env HSA_XNACK=1 "
} }
...@@ -612,6 +630,17 @@ def process_results(Map conf=[:]){ ...@@ -612,6 +630,17 @@ def process_results(Map conf=[:]){
timeout(time: 1, unit: 'HOURS'){ timeout(time: 1, unit: 'HOURS'){
try{ try{
dir("script"){ dir("script"){
if (params.RUN_CK_TILE_TESTS){
try{
unstash "perf_fmha_fwd_gfx942.log"
unstash "perf_fmha_bwd_gfx942.log"
unstash "perf_fmha_fwd_gfx90a.log"
unstash "perf_fmha_bwd_gfx90a.log"
}
catch(Exception err){
echo "could not locate the FMHA performance logs: ${err.getMessage()}."
}
}
if (params.RUN_FULL_QA){ if (params.RUN_FULL_QA){
// unstash perf files to master // unstash perf files to master
unstash "ckprofiler_0.2.0_amd64.deb" unstash "ckprofiler_0.2.0_amd64.deb"
...@@ -621,8 +650,9 @@ def process_results(Map conf=[:]){ ...@@ -621,8 +650,9 @@ def process_results(Map conf=[:]){
unstash "perf_resnet50_N4.log" unstash "perf_resnet50_N4.log"
unstash "perf_batched_gemm.log" unstash "perf_batched_gemm.log"
unstash "perf_grouped_gemm.log" unstash "perf_grouped_gemm.log"
unstash "perf_conv_fwd.log" unstash "perf_grouped_conv_fwd.log"
unstash "perf_conv_bwd_data.log" unstash "perf_grouped_conv_bwd_data.log"
unstash "perf_grouped_conv_bwd_weight.log"
unstash "perf_gemm_bilinear.log" unstash "perf_gemm_bilinear.log"
unstash "perf_reduction.log" unstash "perf_reduction.log"
unstash "perf_splitK_gemm.log" unstash "perf_splitK_gemm.log"
...@@ -652,10 +682,10 @@ def process_results(Map conf=[:]){ ...@@ -652,10 +682,10 @@ def process_results(Map conf=[:]){
} }
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version //launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.1; RUN_CK_TILE_TESTS=true CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.2; RUN_CK_TILE_TESTS=true
0 21 * * * % ROCMVERSION=6.1;hipTensor_test=true 0 21 * * * % ROCMVERSION=6.2;hipTensor_test=true
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false 0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;USE_SCCACHE=false 0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;BUILD_COMPILER=/llvm-project/build/bin/clang++;BUILD_GFX12=true;USE_SCCACHE=false
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : "" 0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : ""
pipeline { pipeline {
...@@ -677,8 +707,8 @@ pipeline { ...@@ -677,8 +707,8 @@ pipeline {
description: 'If you want to use a custom docker image, please specify it here (default: leave blank).') description: 'If you want to use a custom docker image, please specify it here (default: leave blank).')
string( string(
name: 'ROCMVERSION', name: 'ROCMVERSION',
defaultValue: '6.1', defaultValue: '6.2',
description: 'Specify which ROCM version to use: 6.1 (default).') description: 'Specify which ROCM version to use: 6.2 (default).')
string( string(
name: 'COMPILER_VERSION', name: 'COMPILER_VERSION',
defaultValue: '', defaultValue: '',
...@@ -720,9 +750,9 @@ pipeline { ...@@ -720,9 +750,9 @@ pipeline {
defaultValue: true, defaultValue: true,
description: "Run the performance tests (default: ON)") description: "Run the performance tests (default: ON)")
booleanParam( booleanParam(
name: "RUN_CODEGEN_TESTS", name: "RUN_GROUPED_CONV_LARGE_CASES_TESTS",
defaultValue: true, defaultValue: false,
description: "Run the codegen tests (default: ON)") description: "Run the grouped conv large cases tests (default: OFF)")
booleanParam( booleanParam(
name: "RUN_CK_TILE_TESTS", name: "RUN_CK_TILE_TESTS",
defaultValue: false, defaultValue: false,
...@@ -731,6 +761,11 @@ pipeline { ...@@ -731,6 +761,11 @@ pipeline {
name: "BUILD_INSTANCES_ONLY", name: "BUILD_INSTANCES_ONLY",
defaultValue: false, defaultValue: false,
description: "Test building instances for various architectures simultaneously (default: OFF)") description: "Test building instances for various architectures simultaneously (default: OFF)")
booleanParam(
name: "BUILD_GFX12",
defaultValue: false,
description: "Build CK and run tests on gfx12 (default: OFF)")
} }
environment{ environment{
dbuser = "${dbuser}" dbuser = "${dbuser}"
...@@ -809,25 +844,22 @@ pipeline { ...@@ -809,25 +844,22 @@ pipeline {
} }
} }
} }
stage("Run Codegen Tests") stage("Run Grouped Conv Large Case Tests")
{ {
parallel parallel
{ {
stage("Run Codegen Tests on gfx90a") stage("Run Grouped Conv Large Case Tests on gfx90a")
{ {
when { when {
beforeAgent true beforeAgent true
expression { params.RUN_CODEGEN_TESTS.toBoolean() } expression { params.RUN_GROUPED_CONV_LARGE_CASES_TESTS.toBoolean() }
} }
agent{ label rocmnode("gfx90a")} agent{ label rocmnode("gfx90a")}
environment{ environment{
setup_args = "NO_CK_BUILD" setup_args = "NO_CK_BUILD"
execute_args = """ cd ../codegen && rm -rf build && mkdir build && cd build && \ execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \
cmake -D CMAKE_PREFIX_PATH=/opt/rocm \ make -j64 test_grouped_convnd_fwd_large_cases_xdl && \
-D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ ./bin/test_grouped_convnd_fwd_large_cases_xdl"""
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx90a" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j check"""
} }
steps{ steps{
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
...@@ -852,8 +884,7 @@ pipeline { ...@@ -852,8 +884,7 @@ pipeline {
execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \ execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \ make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ && cd ../ &&
example/ck_tile/01_fmha/script/smoke_test_fwd.sh && \ example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
example/ck_tile/01_fmha/script/smoke_test_bwd.sh"""
} }
steps{ steps{
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
...@@ -872,8 +903,7 @@ pipeline { ...@@ -872,8 +903,7 @@ pipeline {
execute_args = """ ../script/cmake-ck-dev.sh ../ gfx942 && \ execute_args = """ ../script/cmake-ck-dev.sh ../ gfx942 && \
make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \ make -j64 tile_example_fmha_fwd tile_example_fmha_bwd && \
cd ../ && cd ../ &&
example/ck_tile/01_fmha/script/smoke_test_fwd.sh && \ example/ck_tile/01_fmha/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
example/ck_tile/01_fmha/script/smoke_test_bwd.sh"""
} }
steps{ steps{
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
...@@ -1008,6 +1038,26 @@ pipeline { ...@@ -1008,6 +1038,26 @@ pipeline {
cleanWs() cleanWs()
} }
} }
stage("Build CK and run Tests on gfx1201")
{
when {
beforeAgent true
expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() }
}
agent{ label rocmnode("gfx1201") }
environment{
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1201" -DDL_KERNELS=ON -DCMAKE_CXX_FLAGS=" -O3 " """
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
-DGPU_TARGETS="gfx1201" \
-DCMAKE_CXX_COMPILER="${build_compiler()}" \
-DCMAKE_CXX_FLAGS=" -O3 " .. && make -j """
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
cleanWs()
}
}
} }
} }
......
...@@ -5,17 +5,17 @@ if(GPU_TARGETS MATCHES "gfx9") ...@@ -5,17 +5,17 @@ if(GPU_TARGETS MATCHES "gfx9")
add_executable(client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp) add_executable(client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp)
target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations)
if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES) if((DTYPES MATCHES "fp8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_grouped_conv3d_fwd_fp8 grouped_conv3d_fwd_fp8.cpp) add_executable(client_grouped_conv3d_fwd_fp8 grouped_conv3d_fwd_fp8.cpp)
target_link_libraries(client_grouped_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_fwd_fp8 PRIVATE composable_kernel::device_conv_operations)
endif() endif()
if((DTYPES MATCHES "bf8") OR NOT DEFINED DTYPES) if((DTYPES MATCHES "bf8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_grouped_conv3d_fwd_bf8 grouped_conv3d_fwd_bf8.cpp) add_executable(client_grouped_conv3d_fwd_bf8 grouped_conv3d_fwd_bf8.cpp)
target_link_libraries(client_grouped_conv3d_fwd_bf8 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_fwd_bf8 PRIVATE composable_kernel::device_conv_operations)
endif() endif()
if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "bf8") OR NOT DEFINED DTYPES) if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "bf8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_grouped_conv3d_fwd_fp8_bf8 grouped_conv3d_fwd_fp8_bf8.cpp) add_executable(client_grouped_conv3d_fwd_fp8_bf8 grouped_conv3d_fwd_fp8_bf8.cpp)
target_link_libraries(client_grouped_conv3d_fwd_fp8_bf8 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_fwd_fp8_bf8 PRIVATE composable_kernel::device_conv_operations)
......
...@@ -4,5 +4,7 @@ target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel:: ...@@ -4,5 +4,7 @@ target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::
add_executable(client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp) add_executable(client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp)
target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations)
add_executable(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp) if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "bf8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp)
target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations)
endif()
\ No newline at end of file
...@@ -2,10 +2,13 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f ...@@ -2,10 +2,13 @@ add_executable(client_grouped_conv1d_bwd_weight_fp16 grouped_conv1d_bwd_weight_f
add_executable(client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp) add_executable(client_grouped_conv2d_bwd_weight_fp16 grouped_conv2d_bwd_weight_fp16.cpp)
add_executable(client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp) add_executable(client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_fp16.cpp)
add_executable(client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp) add_executable(client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp)
add_executable(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp)
target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations)
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations)
if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "bf8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp)
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations)
endif()
\ No newline at end of file
...@@ -4,7 +4,7 @@ if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES) ...@@ -4,7 +4,7 @@ if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
endif() endif()
if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES) if((DTYPES MATCHES "fp8") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp) add_executable(client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp)
target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations) target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations)
endif() endif()
......
if(GPU_TARGETS MATCHES "gfx9" AND ((DTYPES MATCHES "fp8" AND DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)) if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "fp16") OR (NOT DEFINED DTYPES AND GPU_TARGETS MATCHES "gfx94"))
add_executable(client_splitK_gemm splitK_gemm_fp16_f8.cpp) add_executable(client_splitK_gemm splitK_gemm_fp16_f8.cpp)
target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations) target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations)
endif() endif()
...@@ -34,8 +34,17 @@ if (DTYPES) ...@@ -34,8 +34,17 @@ if (DTYPES)
endif() endif()
message("DTYPES macro set to ${DTYPES}") message("DTYPES macro set to ${DTYPES}")
else() else()
add_definitions(-DCK_ENABLE_INT8 -DCK_ENABLE_FP8 -DCK_ENABLE_BF8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16) add_definitions(-DCK_ENABLE_INT8 -DCK_ENABLE_FP16 -DCK_ENABLE_FP32 -DCK_ENABLE_FP64 -DCK_ENABLE_BF16)
set(CK_ENABLE_ALL_DTYPES "ON") set(CK_ENABLE_INT8 "ON")
set(CK_ENABLE_FP16 "ON")
set(CK_ENABLE_FP32 "ON")
set(CK_ENABLE_FP64 "ON")
set(CK_ENABLE_BF16 "ON")
if (GPU_TARGETS MATCHES "gfx94")
add_definitions(-DCK_ENABLE_FP8 -DCK_ENABLE_BF8)
set(CK_ENABLE_FP8 "ON")
set(CK_ENABLE_BF8 "ON")
endif()
endif() endif()
if (GPU_TARGETS) if (GPU_TARGETS)
......
cmake_minimum_required(VERSION 3.16)
project(composable_kernel_host LANGUAGES CXX HIP)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
...@@ -8,17 +5,9 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) ...@@ -8,17 +5,9 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) set(CK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
find_package(ROCM)
include(ROCMInstallTargets)
include(ROCMTest)
add_compile_options(-std=c++17) add_compile_options(-std=c++17)
find_package(hip) find_package(hip)
## HIP add_custom_target(codegen)
set(CMAKE_HIP_PLATFORM amd)
set(CMAKE_HIP_COMPILER ${CMAKE_CXX_COMPILER})
set(CMAKE_HIP_EXTENSIONS ON)
message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}")
# add include directories # add include directories
include_directories(BEFORE include_directories(BEFORE
...@@ -32,8 +21,9 @@ list(APPEND CMAKE_MODULE_PATH ${CK_ROOT}/cmake) ...@@ -32,8 +21,9 @@ list(APPEND CMAKE_MODULE_PATH ${CK_ROOT}/cmake)
include(Embed) include(Embed)
file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
${CK_ROOT}/include/ck/*.hpp) ${CK_ROOT}/include/ck/*.hpp)
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") #printouts fot debug purposes
message(STATUS "RELATIVE: ${CK_ROOT}/include") #message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
#message(STATUS "RELATIVE: ${CK_ROOT}/include")
add_embed_library(ck_headers ${KERNEL_FILES} RELATIVE ${CK_ROOT}/include) add_embed_library(ck_headers ${KERNEL_FILES} RELATIVE ${CK_ROOT}/include)
file(GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp) file(GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp)
......
...@@ -76,8 +76,11 @@ std::string SequenceStr(const std::vector<int>& v); ...@@ -76,8 +76,11 @@ std::string SequenceStr(const std::vector<int>& v);
std::string MakeTuple(const std::vector<std::string>& v); std::string MakeTuple(const std::vector<std::string>& v);
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wglobal-constructors"
template <int... xs> template <int... xs>
const std::string S = SequenceStr({xs...}); const std::string S = SequenceStr({xs...});
#pragma clang diagnostic pop
constexpr const char* PassThrough = "ck::tensor_operation::element_wise::PassThrough"; constexpr const char* PassThrough = "ck::tensor_operation::element_wise::PassThrough";
constexpr const char* Bilinear = "ck::tensor_operation::element_wise::Bilinear"; constexpr const char* Bilinear = "ck::tensor_operation::element_wise::Bilinear";
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
#include "ck/host/device_gemm_multiple_d/operation.hpp" #include "ck/host/device_gemm_multiple_d/operation.hpp"
#include "ck/host/stringutils.hpp" #include "ck/host/stringutils.hpp"
#include "ck/host/types.hpp"
#include "ck/host/utils.hpp" #include "ck/host/utils.hpp"
#include <cassert> #include <cassert>
...@@ -32,11 +33,11 @@ static std::string GetGemmSpec(const std::size_t m, ...@@ -32,11 +33,11 @@ static std::string GetGemmSpec(const std::size_t m,
} }
// function to update prologue/epilogue with user provided operation // function to update prologue/epilogue with user provided operation
void Operation_Xdl_CShuffle::update_prologue(const std::string& prologue) void Operation_Xdl_CShuffle::update_prologue(const std::string& pro)
{ {
if(!prologue.empty()) if(!pro.empty())
{ {
this->prologue = prologue; this->prologue = pro;
this->cde_elem_op = "CDEElementOp"; this->cde_elem_op = "CDEElementOp";
} }
else else
...@@ -45,11 +46,11 @@ void Operation_Xdl_CShuffle::update_prologue(const std::string& prologue) ...@@ -45,11 +46,11 @@ void Operation_Xdl_CShuffle::update_prologue(const std::string& prologue)
} }
} }
void Operation_Xdl_CShuffle::update_epilogue(const std::string& epilogue) void Operation_Xdl_CShuffle::update_epilogue(const std::string& epi)
{ {
if(!epilogue.empty()) if(!epi.empty())
{ {
this->epilogue = epilogue; this->epilogue = epi;
this->cde_elem_op = "CDEElementOp"; this->cde_elem_op = "CDEElementOp";
} }
else else
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "ck/host/device_grouped_conv_fwd_multiple_d/conv_fwd_op.hpp" #include "ck/host/device_grouped_conv_fwd_multiple_d/conv_fwd_op.hpp"
#include <iostream> #include <iostream>
#include "ck/host/stringutils.hpp" #include "ck/host/stringutils.hpp"
#include "ck/host/types.hpp"
#include "ck/host/utils.hpp" #include "ck/host/utils.hpp"
#include <cassert> #include <cassert>
...@@ -11,34 +12,15 @@ namespace ck { ...@@ -11,34 +12,15 @@ namespace ck {
namespace host { namespace host {
namespace conv { namespace conv {
// calculate appropriate Gemm Specification based on input tensor dimensions // NOTE: in CK, MNKPadding is always used for forward convolution, so didn't
// NOTE: in CK, MNKPadding is always used for forward convolution // add GemmSpec function here
static std::string GetGemmSpec(const std::size_t m,
const std::size_t n,
const std::size_t k,
const std::size_t m_per_block,
const std::size_t n_per_block,
const std::size_t k_per_block)
{
std::string spec = "";
if(integer_divide_ceil(m, m_per_block) * m_per_block - m != 0)
spec += "M";
if(integer_divide_ceil(n, n_per_block) * n_per_block - n != 0)
spec += "N";
if(integer_divide_ceil(k, k_per_block) * k_per_block - k != 0)
spec += "K";
if(spec == "")
return "ck::tensor_operation::device::GemmSpecialization::Default";
return "ck::tensor_operation::device::GemmSpecialization::" + spec + "Padding";
}
// function to update prologue/epilogue with user provided operation // function to update prologue/epilogue with user provided operation
void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& prologue) void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& pro)
{ {
if(!prologue.empty()) if(!pro.empty())
{ {
this->prologue = prologue; this->prologue = pro;
this->cde_elem_op = "CDEElementOp"; this->cde_elem_op = "CDEElementOp";
} }
else else
...@@ -47,11 +29,11 @@ void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& prologu ...@@ -47,11 +29,11 @@ void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& prologu
} }
} }
void Operation_Conv_Fwd_Xdl_Cshuffle::update_epilogue(const std::string& epilogue) void Operation_Conv_Fwd_Xdl_Cshuffle::update_epilogue(const std::string& epi)
{ {
if(!epilogue.empty()) if(!epi.empty())
{ {
this->epilogue = epilogue; this->epilogue = epi;
this->cde_elem_op = "CDEElementOp"; this->cde_elem_op = "CDEElementOp";
} }
else else
......
...@@ -4,7 +4,10 @@ ...@@ -4,7 +4,10 @@
namespace ck { namespace ck {
namespace host { namespace host {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wglobal-constructors"
const std::string config_header = ""; const std::string config_header = "";
#pragma clang diagnostic pop
std::unordered_map<std::string_view, std::string_view> GetHeaders() std::unordered_map<std::string_view, std::string_view> GetHeaders()
{ {
......
...@@ -4,7 +4,9 @@ file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp) ...@@ -4,7 +4,9 @@ file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp)
foreach(TEST_SRC ${TEST_SRCS}) foreach(TEST_SRC ${TEST_SRCS})
set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP) set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP)
get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE) get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE)
rocm_add_test_executable(test_host_${BASE_NAME} ${TEST_SRC}) add_executable(test_host_${BASE_NAME} ${TEST_SRC})
add_dependencies(codegen test_host_${BASE_NAME})
add_test(NAME codegen_test_${BASE_NAME} COMMAND test_host_${BASE_NAME})
target_link_libraries(test_host_${BASE_NAME} ck_rtc ck_host) target_link_libraries(test_host_${BASE_NAME} ck_rtc ck_host)
# target_link_libraries(test_host_${BASE_NAME} ${CK_ROOT}/build/lib/libutility.a) # target_link_libraries(test_host_${BASE_NAME} ${CK_ROOT}/build/lib/libutility.a)
target_include_directories(test_host_${BASE_NAME} PUBLIC include()) target_include_directories(test_host_${BASE_NAME} PUBLIC include())
......
...@@ -92,7 +92,6 @@ struct Epilogue ...@@ -92,7 +92,6 @@ struct Epilogue
static_cast<int>(prob.C), static_cast<int>(prob.C),
static_cast<int>(prob.Y), static_cast<int>(prob.Y),
static_cast<int>(prob.X)}; static_cast<int>(prob.X)};
ck::Array<ck::index_t, 5> d_lengths = {};
ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C), ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C),
static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C), static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C),
...@@ -109,7 +108,6 @@ struct Epilogue ...@@ -109,7 +108,6 @@ struct Epilogue
1, 1,
static_cast<int>(prob.X * prob.C), static_cast<int>(prob.X * prob.C),
static_cast<int>(prob.C)}; static_cast<int>(prob.C)};
ck::Array<ck::index_t, 5> d_strides = {};
ck::Array<ck::index_t, 2> conv_filter_strides = {2, 2}; ck::Array<ck::index_t, 2> conv_filter_strides = {2, 2};
ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1};
......
...@@ -92,7 +92,6 @@ struct Epilogue ...@@ -92,7 +92,6 @@ struct Epilogue
static_cast<int>(prob.C), static_cast<int>(prob.C),
static_cast<int>(prob.Y), static_cast<int>(prob.Y),
static_cast<int>(prob.X)}; static_cast<int>(prob.X)};
ck::Array<ck::index_t, 5> d_lengths = {};
ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C), ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C),
static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C), static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C),
...@@ -109,7 +108,6 @@ struct Epilogue ...@@ -109,7 +108,6 @@ struct Epilogue
1, 1,
static_cast<int>(prob.X * prob.C), static_cast<int>(prob.X * prob.C),
static_cast<int>(prob.C)}; static_cast<int>(prob.C)};
ck::Array<ck::index_t, 5> d_strides = {};
ck::Array<ck::index_t, 2> conv_filter_strides = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_strides = {1, 1};
ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1};
......
...@@ -92,7 +92,6 @@ struct Epilogue ...@@ -92,7 +92,6 @@ struct Epilogue
static_cast<int>(prob.C), static_cast<int>(prob.C),
static_cast<int>(prob.Y), static_cast<int>(prob.Y),
static_cast<int>(prob.X)}; static_cast<int>(prob.X)};
ck::Array<ck::index_t, 5> d_lengths = {};
ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C), ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C),
static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C), static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C),
...@@ -109,7 +108,6 @@ struct Epilogue ...@@ -109,7 +108,6 @@ struct Epilogue
1, 1,
static_cast<int>(prob.X * prob.C), static_cast<int>(prob.X * prob.C),
static_cast<int>(prob.C)}; static_cast<int>(prob.C)};
ck::Array<ck::index_t, 5> d_strides = {};
ck::Array<ck::index_t, 2> conv_filter_strides = {2, 2}; ck::Array<ck::index_t, 2> conv_filter_strides = {2, 2};
ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1};
......
...@@ -92,7 +92,6 @@ struct Epilogue ...@@ -92,7 +92,6 @@ struct Epilogue
static_cast<int>(prob.C), static_cast<int>(prob.C),
static_cast<int>(prob.Y), static_cast<int>(prob.Y),
static_cast<int>(prob.X)}; static_cast<int>(prob.X)};
ck::Array<ck::index_t, 5> d_lengths = {};
ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C), ck::Array<ck::index_t, 5> in_strides{static_cast<int>(prob.C),
static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C), static_cast<int>(prob.Hi * prob.Wi * prob.G * prob.C),
...@@ -109,7 +108,6 @@ struct Epilogue ...@@ -109,7 +108,6 @@ struct Epilogue
1, 1,
static_cast<int>(prob.X * prob.C), static_cast<int>(prob.X * prob.C),
static_cast<int>(prob.C)}; static_cast<int>(prob.C)};
ck::Array<ck::index_t, 5> d_strides = {};
ck::Array<ck::index_t, 2> conv_filter_strides = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_strides = {1, 1};
ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1}; ck::Array<ck::index_t, 2> conv_filter_dilations = {1, 1};
......
...@@ -118,4 +118,4 @@ void kernel::launch(hipStream_t stream, ...@@ -118,4 +118,4 @@ void kernel::launch(hipStream_t stream,
launch_kernel(impl->fun, stream, global, local, kernargs.data(), size); launch_kernel(impl->fun, stream, global, local, kernargs.data(), size);
} }
} // namespace rtc } // namespace rtc
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment