".github/vscode:/vscode.git/clone" did not exist on "69121432d8c25697828beb5bd707e27386276dce"
Commit 09d4c3a4 authored by illsilin's avatar illsilin
Browse files

merge from public repo

parents 171ed358 8e4c3fb1
......@@ -26,11 +26,15 @@ set(version 1.1.0)
project(composable_kernel VERSION ${version} LANGUAGES CXX HIP)
include(CTest)
# Usage: for customized Python location cmake -DCK_USE_ALTERNATIVE_PYTHON="/opt/Python-3.8.13/bin/python3.8"
# CK Codegen requires dataclass which is added in Python 3.7
# Python version 3.8 is required for general good practice as it is default for Ubuntu 20.04
if(NOT CK_USE_ALTERNATIVE_PYTHON)
find_package(Python3 3.6 COMPONENTS Interpreter REQUIRED)
find_package(Python3 3.8 COMPONENTS Interpreter REQUIRED)
else()
message("Using alternative python version")
set(EXTRA_PYTHON_PATH)
# this is overly restrictive, we may need to be more flexible on the following
string(REPLACE "/bin/python3.8" "" EXTRA_PYTHON_PATH "${CK_USE_ALTERNATIVE_PYTHON}")
message("alternative python path is: ${EXTRA_PYTHON_PATH}")
find_package(Python3 3.6 COMPONENTS Interpreter REQUIRED)
......
......@@ -23,6 +23,11 @@ RUN if [ "$ROCMVERSION" != "6.3" ]; then \
wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
sh -c "echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] $DEB_ROCM_REPO focal main > /etc/apt/sources.list.d/rocm.list" && \
sh -c 'echo deb [arch=amd64 signed-by=/etc/apt/trusted.gpg.d/rocm-keyring.gpg] https://repo.radeon.com/amdgpu/$ROCMVERSION/ubuntu focal main > /etc/apt/sources.list.d/amdgpu.list'; \
elif [ "$ROCMVERSION" = "6.3" ] && [ "$compiler_version" = "rc1" ]; then \
sh -c "wget http://artifactory-cdn.amd.com/artifactory/list/amdgpu-deb/amdgpu-install-internal_6.3.0.1-20.04-1_all.deb --no-check-certificate" && \
apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install dialog libpopt0 rsync && DEBIAN_FRONTEND=noninteractive apt-get install ./amdgpu-install-internal_6.3.0.1-20.04-1_all.deb && \
sh -c 'echo deb [arch=amd64 trusted=yes] http://compute-artifactory.amd.com/artifactory/list/rocm-release-archive-20.04-deb/ 6.3.0.1 rel-5 > /etc/apt/sources.list.d/rocm-build.list' && \
amdgpu-repo --amdgpu-build=2033700; \
fi
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
......@@ -130,6 +135,8 @@ ENV compiler_commit=$compiler_commit
RUN sh -c "echo compiler version = '$compiler_version'"
RUN sh -c "echo compiler commit = '$compiler_commit'"
ARG DISABLE_CACHE=0
RUN if ( [ "$compiler_version" = "amd-staging" ] || [ "$compiler_version" = "amd-mainline-open" ] ) && [ "$compiler_commit" = "" ]; then \
git clone -b "$compiler_version" https://github.com/ROCm/llvm-project.git && \
cd llvm-project && mkdir build && cd build && \
......
......@@ -94,13 +94,21 @@ def getDockerImage(Map conf=[:]){
env.DOCKER_BUILDKIT=1
def prefixpath = conf.get("prefixpath", "/opt/rocm")
def no_cache = conf.get("no_cache", false)
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${prefixpath} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' "
if(no_cache)
{
dockerArgs = dockerArgs + " --no-cache "
}
echo "Docker Args: ${dockerArgs}"
def image = getDockerImageName()
def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
//Check if image exists
def retimage
try
......@@ -124,8 +132,10 @@ def buildDocker(install_prefix){
checkout scm
def image_name = getDockerImageName()
echo "Building Docker for ${image_name}"
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def dockerArgs = "--build-arg BUILDKIT_INLINE_CACHE=1 --build-arg PREFIX=${install_prefix} --build-arg CK_SCCACHE='${env.CK_SCCACHE}' --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' --build-arg DISABLE_CACHE='git rev-parse ${params.COMPILER_VERSION}' "
if(params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
dockerArgs = dockerArgs + " --no-cache "
}
echo "Build Args: ${dockerArgs}"
try{
if(params.BUILD_DOCKER){
......@@ -259,6 +269,7 @@ def cmake_build(Map conf=[:]){
""")
sh cmd3
}
// reduce parallelism when compiling, clang uses too much memory
def nt = nthreads()
def cmd
......@@ -273,7 +284,7 @@ def cmake_build(Map conf=[:]){
}
else{
setup_cmd = conf.get("setup_cmd", "${cmake_envs} cmake ${setup_args} .. ")
build_cmd = conf.get("build_cmd", "${build_envs} dumb-init make -j${nt} ${config_targets}")
build_cmd = conf.get("build_cmd", "${build_envs} make -j${nt} ${config_targets}")
}
cmd = conf.get("cmd", """
${setup_cmd}
......@@ -292,8 +303,8 @@ def cmake_build(Map conf=[:]){
dir("build"){
//build CK
sh cmd
//run tests
if(!setup_args.contains("NO_CK_BUILD")){
//run tests except when NO_CK_BUILD or BUILD_LEGACY_OS are set
if(!setup_args.contains("NO_CK_BUILD") && !params.BUILD_LEGACY_OS){
if (setup_args.contains("gfx90a") && params.NINJA_BUILD_TRACE){
sh "/ninjatracing/ninjatracing .ninja_log > ck_build_trace.json"
archiveArtifacts "ck_build_trace.json"
......@@ -330,7 +341,15 @@ def buildHipClangJob(Map conf=[:]){
env.HSA_ENABLE_SDMA=0
checkout scm
def image = getDockerImageName()
def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group
......@@ -352,7 +371,7 @@ def buildHipClangJob(Map conf=[:]){
def retimage
(retimage, image) = getDockerImage(conf)
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 48, unit: 'HOURS')
{
......@@ -407,7 +426,7 @@ def runCKProfiler(Map conf=[:]){
def variant = env.STAGE_NAME
def retimage
gitStatusWrapper(credentialsId: "${status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
try {
(retimage, image) = getDockerImage(conf)
withDockerContainer(image: image, args: dockerOpts) {
......@@ -512,7 +531,16 @@ def Build_CK(Map conf=[:]){
env.DOCKER_BUILDKIT=1
checkout scm
def image = getDockerImageName()
def image
if ( params.BUILD_LEGACY_OS && conf.get("docker_name", "") != "" ){
image = conf.get("docker_name", "")
echo "Using legacy docker: ${image}"
}
else{
image = getDockerImageName()
echo "Using default docker: ${image}"
}
def prefixpath = conf.get("prefixpath", "/opt/rocm")
// Jenkins is complaining about the render group
......@@ -524,6 +552,9 @@ def Build_CK(Map conf=[:]){
if (params.COMPILER_VERSION == "amd-staging" || params.COMPILER_VERSION == "amd-mainline-open" || params.COMPILER_COMMIT != ""){
dockerOpts = dockerOpts + " --env HIP_CLANG_PATH='/llvm-project/build/bin' "
}
if(params.BUILD_LEGACY_OS){
dockerOpts = dockerOpts + " --env LD_LIBRARY_PATH='/opt/Python-3.8.13/lib' "
}
def video_id = sh(returnStdout: true, script: 'getent group video | cut -d: -f3')
def render_id = sh(returnStdout: true, script: 'getent group render | cut -d: -f3')
dockerOpts = dockerOpts + " --group-add=${video_id} --group-add=${render_id} "
......@@ -532,7 +563,7 @@ def Build_CK(Map conf=[:]){
def variant = env.STAGE_NAME
def retimage
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
try {
(retimage, image) = getDockerImage(conf)
withDockerContainer(image: image, args: dockerOpts) {
......@@ -637,7 +668,7 @@ def process_results(Map conf=[:]){
def variant = env.STAGE_NAME
def retimage
gitStatusWrapper(credentialsId: "${env.status_wrapper_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
gitStatusWrapper(credentialsId: "${env.ck_git_creds}", gitHubContext: "Jenkins - ${variant}", account: 'ROCm', repo: 'composable_kernel-internal') {
try {
(retimage, image) = getDockerImage(conf)
}
......@@ -765,9 +796,13 @@ pipeline {
defaultValue: false,
description: "Run the grouped conv large cases tests (default: OFF)")
booleanParam(
name: "RUN_CK_TILE_TESTS",
name: "RUN_CK_TILE_FMHA_TESTS",
defaultValue: false,
description: "Run the ck_tile tests (default: OFF)")
description: "Run the ck_tile FMHA tests (default: OFF)")
booleanParam(
name: "RUN_CK_TILE_GEMM_TESTS",
defaultValue: false,
description: "Run the ck_tile GEMM tests (default: OFF)")
booleanParam(
name: "BUILD_INSTANCES_ONLY",
defaultValue: false,
......@@ -780,6 +815,10 @@ pipeline {
name: "NINJA_BUILD_TRACE",
defaultValue: false,
description: "Generate a ninja build trace (default: OFF)")
booleanParam(
name: "BUILD_LEGACY_OS",
defaultValue: false,
description: "Try building CK with legacy OS dockers: RHEL8 and SLES15 (default: OFF)")
}
environment{
dbuser = "${dbuser}"
......@@ -788,7 +827,7 @@ pipeline {
dbsshport = "${dbsshport}"
dbsshuser = "${dbsshuser}"
dbsshpassword = "${dbsshpassword}"
status_wrapper_creds = "${status_wrapper_creds}"
ck_git_creds = "${ck_git_creds}"
gerrit_cred="${gerrit_cred}"
DOCKER_BUILDKIT = "1"
}
......@@ -884,15 +923,15 @@ pipeline {
}
}
}
stage("Run CK_TILE Tests")
stage("Run CK_TILE_FMHA Tests")
{
parallel
{
stage("Run CK_TILE Tests on gfx90a")
stage("Run CK_TILE_FMHA Tests on gfx90a")
{
when {
beforeAgent true
expression { params.RUN_CK_TILE_TESTS.toBoolean() }
expression { params.RUN_CK_TILE_FMHA_TESTS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
......@@ -907,11 +946,11 @@ pipeline {
cleanWs()
}
}
stage("Run CK_TILE Tests on gfx942")
stage("Run CK_TILE_FMHA Tests on gfx942")
{
when {
beforeAgent true
expression { params.RUN_CK_TILE_TESTS.toBoolean() }
expression { params.RUN_CK_TILE_FMHA_TESTS.toBoolean() }
}
agent{ label rocmnode("gfx942") }
environment{
......@@ -928,21 +967,103 @@ pipeline {
}
}
}
stage("Run CK_TILE_GEMM Tests")
{
parallel
{
stage("Run CK_TILE_GEMM Tests on gfx90a")
{
when {
beforeAgent true
expression { params.RUN_CK_TILE_GEMM_TESTS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
setup_args = "NO_CK_BUILD"
execute_args = """ ../script/cmake-ck-dev.sh ../ gfx90a && \
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx90a """
}
steps{
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
cleanWs()
}
}
stage("Run CK_TILE_GEMM Tests on gfx942")
{
when {
beforeAgent true
expression { params.RUN_CK_TILE_GEMM_TESTS.toBoolean() }
}
agent{ label rocmnode("gfx942") }
environment{
setup_args = "NO_CK_BUILD"
execute_args = """ ../script/cmake-ck-dev.sh ../ gfx942 && \
make -j64 tile_example_gemm_basic && \
cd ../ &&
example/ck_tile/03_gemm/script/run_full_test.sh "CI_${params.COMPILER_VERSION}" "${env.BRANCH_NAME}" "${NODE_NAME}" gfx942 """
}
steps{
buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args)
cleanWs()
}
}
}
}
stage("Build CK and run Tests")
{
parallel
{
stage("Build CK with RHEL8")
{
when {
beforeAgent true
expression { params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_rhel8_rocm6.3"
setup_args = """ -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args = " "
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name)
cleanWs()
}
}
stage("Build CK with SLES15")
{
when {
beforeAgent true
expression { params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
def docker_name = "${env.CK_DOCKERHUB_PRIVATE}:ck_sles15_rocm6.3"
setup_args = """ -DGPU_TARGETS="gfx942" \
-DCMAKE_CXX_FLAGS=" -O3 " \
-DCK_USE_ALTERNATIVE_PYTHON=/opt/Python-3.8.13/bin/python3.8 """
execute_args = " "
}
steps{
Build_CK_and_Reboot(setup_args: setup_args, config_targets: " ", no_reboot:true, build_type: 'Release', docker_name: docker_name)
cleanWs()
}
}
stage("Build CK for all gfx9 targets")
{
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
setup_args = """ -DCMAKE_INSTALL_PREFIX=../install \
-DGPU_TARGETS="gfx908;gfx90a;gfx940;gfx941;gfx942" \
-DCMAKE_EXE_LINKER_FLAGS=" -L ${env.WORKSPACE}/script -T hip_fatbin_insert " \
-DCMAKE_CXX_FLAGS=" -O3 " """
execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && \
cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" \
......@@ -959,7 +1080,7 @@ pipeline {
{
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
expression { params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx942") }
environment{
......@@ -979,7 +1100,7 @@ pipeline {
{
when {
beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() }
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
......@@ -999,7 +1120,7 @@ pipeline {
{
when {
beforeAgent true
expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() }
expression { params.BUILD_INSTANCES_ONLY.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx90a") }
environment{
......@@ -1018,7 +1139,7 @@ pipeline {
{
when {
beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() }
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx1030") }
environment{
......@@ -1038,7 +1159,7 @@ pipeline {
{
when {
beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() }
expression { !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx1101") }
environment{
......@@ -1058,7 +1179,7 @@ pipeline {
{
when {
beforeAgent true
expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() }
expression { params.BUILD_GFX12.toBoolean() && !params.RUN_FULL_QA.toBoolean() && !params.BUILD_INSTANCES_ONLY.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent{ label rocmnode("gfx1201") }
environment{
......@@ -1085,7 +1206,7 @@ pipeline {
{
when {
beforeAgent true
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() }
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
options { retry(1) }
agent{ label rocmnode("gfx90a")}
......@@ -1106,7 +1227,7 @@ pipeline {
stage("Process results"){
when {
beforeAgent true
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() }
expression { params.RUN_PERFORMANCE_TESTS.toBoolean() && !params.BUILD_LEGACY_OS.toBoolean() }
}
agent { label 'mici' }
steps{
......
......@@ -6,6 +6,9 @@ if(NOT INSTANCES_ONLY)
set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP)
get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE)
add_executable(codegen_test_${BASE_NAME} ${TEST_SRC})
if(CK_USE_ALTERNATIVE_PYTHON)
target_link_options(codegen_test_${BASE_NAME} PRIVATE -lstdc++fs)
endif()
add_dependencies(codegen codegen_test_${BASE_NAME})
add_dependencies(tests codegen_test_${BASE_NAME})
add_dependencies(check codegen_test_${BASE_NAME})
......
......@@ -2,14 +2,14 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#include <rtc/kernel.hpp>
#include <filesystem>
#include <ck/filesystem.hpp>
#include <string>
namespace rtc {
struct src_file
{
std::filesystem::path path;
CK::fs::path path;
std::string_view content;
};
......
......@@ -2,13 +2,13 @@
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_TMP_DIR
#include <string>
#include <filesystem>
#include <ck/filesystem.hpp>
namespace rtc {
struct tmp_dir
{
std::filesystem::path path;
CK::fs::path path;
tmp_dir(const std::string& prefix = "");
void execute(const std::string& cmd) const;
......
......@@ -70,9 +70,9 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
for(const auto& src : srcs)
{
std::filesystem::path full_path = td.path / src.path;
std::filesystem::path parent_path = full_path.parent_path();
std::filesystem::create_directories(parent_path);
CK::fs::path full_path = td.path / src.path;
CK::fs::path parent_path = full_path.parent_path();
CK::fs::create_directories(parent_path);
write_string(full_path.string(), src.content);
if(src.path.extension().string() == ".cpp")
{
......@@ -86,7 +86,7 @@ kernel compile_kernel(const std::vector<src_file>& srcs, compile_options options
td.execute(compiler() + options.flags);
auto out_path = td.path / out;
if(not std::filesystem::exists(out_path))
if(not CK::fs::exists(out_path))
throw std::runtime_error("Output file missing: " + out);
auto obj = read_buffer(out_path.string());
......
......@@ -31,10 +31,10 @@ std::string unique_string(const std::string& prefix)
}
tmp_dir::tmp_dir(const std::string& prefix)
: path(std::filesystem::temp_directory_path() /
: path(CK::fs::temp_directory_path() /
unique_string(prefix.empty() ? "ck-rtc" : "ck-rtc-" + prefix))
{
std::filesystem::create_directories(this->path);
CK::fs::create_directories(this->path);
}
void tmp_dir::execute(const std::string& cmd) const
......@@ -43,6 +43,6 @@ void tmp_dir::execute(const std::string& cmd) const
std::system(s.c_str());
}
tmp_dir::~tmp_dir() { std::filesystem::remove_all(this->path); }
tmp_dir::~tmp_dir() { CK::fs::remove_all(this->path); }
} // namespace rtc
rocm-docs-core==1.7.2
sphinxcontrib-bibtex==2.6.2
rocm-docs-core==1.8.2
sphinxcontrib-bibtex==2.6.3
......@@ -103,7 +103,7 @@ requests==2.32.3
# via
# pygithub
# sphinx
rocm-docs-core==1.7.2
rocm-docs-core==1.8.2
# via -r requirements.in
six==1.16.0
# via pybtex
......@@ -137,7 +137,7 @@ sphinx-notfound-page==1.0.3
# via rocm-docs-core
sphinxcontrib-applehelp==2.0.0
# via sphinx
sphinxcontrib-bibtex==2.6.2
sphinxcontrib-bibtex==2.6.3
# via -r requirements.in
sphinxcontrib-devhelp==2.0.0
# via sphinx
......
......@@ -305,6 +305,14 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
}
#endif
}
else
{
// When the Problem Type and Problem Size does not fit.
std::cerr << gemm.GetTypeString() << ": the instance does not support the problem config."
<< std::endl;
return true;
}
std::size_t flop = 2_uz * M * N * K;
std::size_t num_btype =
......
......@@ -161,18 +161,6 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
a_m_k.GenerateTensorValue(GeneratorTensor_3<ADataType>{0.0, 1.0});
b_k_n.GenerateTensorValue(GeneratorTensor_3<BDataType>{-0.5, 0.5});
}
#if 0
printf("B matrix:\n");
for (int in = 0; in < N; in++)
{
for (int ik = 0; ik < K; ik++)
{
printf("%02x ", *(reinterpret_cast<uint8_t*>(&b_k_n(ik,in))));
if(ik%8==7) printf("|");
}
printf("\n");
}
#endif
Tensor<CDataType> c_m_n_host_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
Tensor<CDataType> c_m_n_device_result(f_host_tensor_descriptor(M, N, StrideC, CLayout{}));
......@@ -272,7 +260,8 @@ bool run_gemm(const ProblemType& problem_size, const ExecutionConfig& config)
if(config.time_kernel)
{
ave_time = invoker.Run(argument, StreamConfig{nullptr, config.time_kernel});
ave_time =
invoker.Run(argument, StreamConfig{nullptr, config.time_kernel, 0, 5, 10, true, 4});
std::size_t flop = 2_uz * M * N * K;
std::size_t num_btype =
......
add_example_executable(example_complex_contraction_bilinear_xdl_fp32 complex_contraction_bilinear_xdl_fp32.cpp)
add_example_executable(example_complex_contraction_bilinear_xdl_fp64 complex_contraction_bilinear_xdl_fp64.cpp)
# Instructions for ```example_complex_contraction_bilinear_xdl_fp32```
## Run
```bash
#arg1: verification (0=no, 1=yes)
#arg2: initialization (0=no init, 1=integer value, 2=decimal value)
#arg3: time kernel (0=no, 1=yes)
./bin/example_contraction_bilinear_xdl_fp32 1 1 1
```
This diff is collapsed.
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "common_instances.hpp"
using ADataType = F32;
using BDataType = F32;
using AccDataType = F32;
using CShuffleDataType = F32;
using DDataType = F32;
using DsDataType = ck::Tuple<DDataType>;
using EDataType = F32;
using ComputeDataType = F32;
static constexpr ck::index_t NumDimM = 2;
static constexpr ck::index_t NumDimN = 2;
static constexpr ck::index_t NumDimK = 2;
using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using BElementOp = ck::tensor_operation::element_wise::PassThrough;
using CDEElementOp = ck::tensor_operation::element_wise::Bilinear;
using DeviceOpInstanceKKNN = DeviceOpInstanceKK_Generic<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceKNNN = DeviceOpInstanceKN_Generic<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceMKNN = DeviceOpInstanceMK_Generic<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceMNNN = DeviceOpInstanceMN_Generic<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstance = DeviceOpInstanceKKNN;
#include "run_complex_contraction_bilinear_example.inc"
int main(int argc, char* argv[]) { return run_complex_contraction_bilinear_example(argc, argv); }
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "common_instances.hpp"
using ADataType = F64;
using BDataType = F64;
using AccDataType = F64;
using CShuffleDataType = F64;
using DDataType = F64;
using DsDataType = ck::Tuple<DDataType>;
using EDataType = F64;
using ComputeDataType = F64;
static constexpr ck::index_t NumDimM = 2;
static constexpr ck::index_t NumDimN = 2;
static constexpr ck::index_t NumDimK = 2;
using AElementOp = ck::tensor_operation::element_wise::PassThrough;
using BElementOp = ck::tensor_operation::element_wise::PassThrough;
using CDEElementOp = ck::tensor_operation::element_wise::Bilinear;
using DeviceOpInstanceKKNN = DeviceOpInstanceKK_FP64<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceKNNN = DeviceOpInstanceKN_FP64<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceMKNN = DeviceOpInstanceMK_FP64<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstanceMNNN = DeviceOpInstanceMN_FP64<NumDimM,
NumDimN,
NumDimK,
ADataType,
BDataType,
AccDataType,
CShuffleDataType,
DsDataType,
EDataType,
ComputeDataType,
AElementOp,
BElementOp,
CDEElementOp>;
using DeviceOpInstance = DeviceOpInstanceKKNN;
#include "run_complex_contraction_bilinear_example.inc"
int main(int argc, char* argv[]) { return run_complex_contraction_bilinear_example(argc, argv); }
......@@ -22,12 +22,20 @@ string(REPLACE ";" "," FMHA_FWD_APIS "${FMHA_FWD_ENABLE_APIS}")
execute_process(
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py
--api ${FMHA_FWD_APIS} --list_blobs ${CMAKE_CURRENT_BINARY_DIR}/fwd_blob_list.txt
RESULT_VARIABLE ret
)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to genrate a list of FWD kernels via Python.")
endif()
execute_process(
COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_LIST_DIR}/generate.py
--api bwd --list_blobs ${CMAKE_CURRENT_BINARY_DIR}/bwd_blob_list.txt --receipt 3
RESULT_VARIABLE ret
)
if(ret AND NOT ret EQUAL 0)
message( FATAL_ERROR "CK Tile FMHA FAILED to genrate a list of BWD kernels via Python.")
endif()
# NOTE: for cmake, the FMHA_FWD_GEN_BLOBS/FMHA_BWD_GEN_BLOBS files must be in the same directory
# as current cmake list, otherwise will not figure out the dependency properly
......
......@@ -99,13 +99,26 @@ auto create_args(int argc, char* argv[])
// different threshold for different dtype
template <typename DataType>
auto get_elimit(int /*init_method*/)
auto get_elimit(ck_tile::index_t /*hdim_q*/, ck_tile::index_t /*hdim_v*/)
{
double rtol = 1e-2;
double atol = 1e-2;
return ck_tile::make_tuple(rtol, atol);
}
template <>
auto get_elimit<ck_tile::bf16_t>(ck_tile::index_t hdim_q, ck_tile::index_t hdim_v)
{
double rtol = 1e-2;
double atol = 1e-2;
if(hdim_q > 128 && hdim_v > 128) // 3.2 for RTZ/1.5 for RTN
{
rtol = 3.2e-2;
atol = 3.2e-2;
}
return ck_tile::make_tuple(rtol, atol);
}
template <typename DataType>
bool run(const ck_tile::ArgParser& arg_parser)
{
......@@ -899,7 +912,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
}
// clang-format on
auto [rtol, atol] = get_elimit<DataType>(init_method);
auto [rtol, atol] = get_elimit<DataType>(hdim_q, hdim_v);
bool dq_cur_pass = ck_tile::check_err(dq_host_result,
dq_host_ref,
std::string("Error: QGrad Incorrect results!"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment