"test/git@developer.sourcefind.cn:gaoqiong/migraphx.git" did not exist on "59b0d9e64df9448d44d7b80f792bba99e7650bb3"
Commit 78e355fd authored by gaoqiong's avatar gaoqiong
Browse files

onnxruntime

parent fae08684
Pipeline #494 failed with stages
in 0 seconds
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <functional>
#include <numeric>
#include <iomanip>
#include <iostream>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp"
using XDataType = float;
using YDataType = float;
using AccDataType = float;
using ScaleDataType = AccDataType;
using BiasDataType = AccDataType;
using MeanVarDataType = AccDataType;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
constexpr int Rank = 4;
constexpr int NumBatchNormReduceDim = 3;
const double epsilon = std::numeric_limits<float>::epsilon();
const double averageFactor = 0.1;
struct SimpleDeviceMem
{
SimpleDeviceMem() = delete;
SimpleDeviceMem(std::size_t mem_size) : p_mem_{}
{
(void)hipMalloc(static_cast<void**>(&p_mem_), mem_size);
}
void* GetDeviceBuffer() { return p_mem_; }
~SimpleDeviceMem() { (void)hipFree(p_mem_); }
void* p_mem_;
};
int main(int argc, char* argv[])
{
std::array<ck::index_t, Rank> xyLengths{16, 8, 128, 256};
std::array<ck::index_t, Rank> xyStrides{8 * 128 * 256, 128 * 256, 256, 1};
std::array<ck::index_t, Rank - NumBatchNormReduceDim> scaleBiasMeanVarLengths{256};
std::array<ck::index_t, Rank - NumBatchNormReduceDim> scaleBiasMeanVarStrides{1};
std::array<int, NumBatchNormReduceDim> reduceDims{0, 1, 2};
ck::index_t numXYElement =
std::accumulate(xyLengths.begin(), xyLengths.end(), 1, std::multiplies<ck::index_t>());
ck::index_t numScaleBiasMeanVarElement = std::accumulate(scaleBiasMeanVarLengths.begin(),
scaleBiasMeanVarLengths.end(),
1,
std::multiplies<ck::index_t>());
SimpleDeviceMem x(sizeof(XDataType) * numXYElement);
SimpleDeviceMem y(sizeof(YDataType) * numXYElement);
SimpleDeviceMem scale(sizeof(ScaleDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem bias(sizeof(BiasDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem mean(sizeof(MeanVarDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem invVariance(sizeof(MeanVarDataType) * numScaleBiasMeanVarElement);
using DeviceOp = ck::tensor_operation::device::DeviceBatchNormFwd<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
PassThrough,
Rank,
NumBatchNormReduceDim>;
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
std::string best_op_name;
bool found = false;
int best_op_id = -1;
float best_ave_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
// profile device operation instances
std::cout << "Run all instances and do timing" << std::endl;
for(int i = 0; i < op_ptrs.size(); ++i)
{
auto& op_ptr = op_ptrs[i];
auto argument_ptr = op_ptr->MakeArgumentPointer(xyLengths,
xyStrides,
xyStrides,
reduceDims,
scaleBiasMeanVarLengths,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
x.GetDeviceBuffer(),
scale.GetDeviceBuffer(),
bias.GetDeviceBuffer(),
epsilon,
PassThrough{},
y.GetDeviceBuffer(),
mean.GetDeviceBuffer(),
invVariance.GetDeviceBuffer(),
averageFactor,
nullptr,
nullptr);
auto invoker_ptr = op_ptr->MakeInvokerPointer();
std::string op_name = op_ptr->GetTypeString();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
size_t workspace_sz = op_ptr->GetWorkSpaceSize(argument_ptr.get());
SimpleDeviceMem workspace(workspace_sz);
op_ptr->SetWorkSpacePointer(argument_ptr.get(), workspace.GetDeviceBuffer());
float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true});
std::size_t num_bytes =
numXYElement * (sizeof(XDataType) + sizeof(YDataType)) +
numScaleBiasMeanVarElement * (sizeof(ScaleDataType) + sizeof(BiasDataType) +
sizeof(MeanVarDataType) + sizeof(MeanVarDataType));
float gb_per_sec = num_bytes / 1.E6 / ave_time;
std::cout << "Perf: " << std::setw(10) << ave_time << " ms, " << gb_per_sec << " GB/s, "
<< op_name << std::endl;
if(ave_time < best_ave_time)
{
found = true;
best_op_id = i;
best_op_name = op_name;
best_ave_time = ave_time;
best_gb_per_sec = gb_per_sec;
}
}
else
{
std::cout << op_name << " does not support this problem" << std::endl;
}
}
if(found)
{
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
// run the best intance
auto& op_ptr = op_ptrs[best_op_id];
std::cout << "Run the best instance without timing: " << op_ptr->GetTypeString()
<< std::endl;
auto argument_ptr = op_ptr->MakeArgumentPointer(xyLengths,
xyStrides,
xyStrides,
reduceDims,
scaleBiasMeanVarLengths,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
x.GetDeviceBuffer(),
scale.GetDeviceBuffer(),
bias.GetDeviceBuffer(),
epsilon,
PassThrough{},
y.GetDeviceBuffer(),
mean.GetDeviceBuffer(),
invVariance.GetDeviceBuffer(),
averageFactor,
nullptr,
nullptr);
auto invoker_ptr = op_ptr->MakeInvokerPointer();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, false});
}
std::cout << "Done" << std::endl;
}
return 0;
}
add_executable(client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp)
target_link_libraries(client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_operations)
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <functional>
#include <numeric>
#include <iomanip>
#include <iostream>
#include <vector>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_reduce.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp"
using XDataType = float;
using YDataType = float;
using AccDataType = float;
using ScaleDataType = AccDataType;
using BiasDataType = AccDataType;
using MeanVarDataType = AccDataType;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
constexpr int Rank = 4;
constexpr int NumBatchNormReduceDim = 3;
const double epsilon = std::numeric_limits<float>::epsilon();
const double averageFactor = 0.1;
struct SimpleDeviceMem
{
SimpleDeviceMem() = delete;
SimpleDeviceMem(std::size_t mem_size) : p_mem_{}
{
(void)hipMalloc(static_cast<void**>(&p_mem_), mem_size);
}
void* GetDeviceBuffer() { return p_mem_; }
~SimpleDeviceMem() { (void)hipFree(p_mem_); }
void* p_mem_;
};
// In the actual application, the instance index and name are usually from the perf db
static int instance_index = -1;
static std::string instance_name;
int main(int argc, char* argv[])
{
std::array<ck::index_t, Rank> xyLengths{16, 8, 128, 256};
std::array<ck::index_t, Rank> xyStrides{8 * 128 * 256, 128 * 256, 256, 1};
std::array<ck::index_t, Rank - NumBatchNormReduceDim> scaleBiasMeanVarLengths{256};
std::array<ck::index_t, Rank - NumBatchNormReduceDim> scaleBiasMeanVarStrides{1};
std::array<int, NumBatchNormReduceDim> reduceDims{0, 1, 2};
ck::index_t numXYElement =
std::accumulate(xyLengths.begin(), xyLengths.end(), 1, std::multiplies<ck::index_t>());
ck::index_t numScaleBiasMeanVarElement = std::accumulate(scaleBiasMeanVarLengths.begin(),
scaleBiasMeanVarLengths.end(),
1,
std::multiplies<ck::index_t>());
SimpleDeviceMem x(sizeof(XDataType) * numXYElement);
SimpleDeviceMem y(sizeof(YDataType) * numXYElement);
SimpleDeviceMem scale(sizeof(ScaleDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem bias(sizeof(BiasDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem mean(sizeof(MeanVarDataType) * numScaleBiasMeanVarElement);
SimpleDeviceMem invVariance(sizeof(MeanVarDataType) * numScaleBiasMeanVarElement);
using DeviceOp = ck::tensor_operation::device::DeviceBatchNormFwd<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
PassThrough,
Rank,
NumBatchNormReduceDim>;
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
std::cout << "found " << op_ptrs.size() << " instances" << std::endl;
bool found = false;
int best_op_index = -1;
float best_ave_time = std::numeric_limits<float>::max();
// profile device operation instances and save the best performant instance index and instance
// name
std::cout << "Run all instances and do timing" << std::endl;
for(int i = 0; i < op_ptrs.size(); ++i)
{
auto& op_ptr = op_ptrs[i];
auto argument_ptr = op_ptr->MakeArgumentPointer(xyLengths,
xyStrides,
xyStrides,
reduceDims,
scaleBiasMeanVarLengths,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
x.GetDeviceBuffer(),
scale.GetDeviceBuffer(),
bias.GetDeviceBuffer(),
epsilon,
PassThrough{},
y.GetDeviceBuffer(),
mean.GetDeviceBuffer(),
invVariance.GetDeviceBuffer(),
averageFactor,
nullptr,
nullptr);
auto invoker_ptr = op_ptr->MakeInvokerPointer();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
size_t workspace_sz = op_ptr->GetWorkSpaceSize(argument_ptr.get());
SimpleDeviceMem workspace(workspace_sz);
op_ptr->SetWorkSpacePointer(argument_ptr.get(), workspace.GetDeviceBuffer());
float ave_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true});
if(ave_time < best_ave_time)
{
found = true;
best_op_index = i;
best_ave_time = ave_time;
}
}
}
if(found)
{
instance_index = best_op_index;
instance_name = op_ptrs[instance_index]->GetTypeIdHashCode();
};
// simulate the execution of the operation when the instance index and name are available
const auto op_ptrs_2 = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
if(instance_index >= 0 && instance_index < op_ptrs_2.size())
{
auto& op_ptr = op_ptrs_2[instance_index];
if(op_ptr->GetTypeIdHashCode() == instance_name)
{
auto argument_ptr = op_ptr->MakeArgumentPointer(xyLengths,
xyStrides,
xyStrides,
reduceDims,
scaleBiasMeanVarLengths,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
scaleBiasMeanVarStrides,
x.GetDeviceBuffer(),
scale.GetDeviceBuffer(),
bias.GetDeviceBuffer(),
epsilon,
PassThrough{},
y.GetDeviceBuffer(),
mean.GetDeviceBuffer(),
invVariance.GetDeviceBuffer(),
averageFactor,
nullptr,
nullptr);
auto invoker_ptr = op_ptr->MakeInvokerPointer();
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
size_t workspace_sz = op_ptr->GetWorkSpaceSize(argument_ptr.get());
SimpleDeviceMem workspace(workspace_sz);
op_ptr->SetWorkSpacePointer(argument_ptr.get(), workspace.GetDeviceBuffer());
float exec_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, true});
size_t num_bytes = numXYElement * (sizeof(XDataType) + sizeof(YDataType)) +
numScaleBiasMeanVarElement *
(sizeof(ScaleDataType) + sizeof(BiasDataType) +
sizeof(MeanVarDataType) + sizeof(MeanVarDataType));
float gb_per_sec = num_bytes / 1.E6 / exec_time;
std::cout << "Kernel execution time: " << std::setw(10) << exec_time
<< " ms, effective data transfer bandwidth: " << gb_per_sec << " GB/s"
<< std::endl;
}
};
}
return 0;
}
cmake_minimum_required(VERSION 3.15)
project(ck_app)
add_compile_options(-std=c++17)
find_package(composable_kernel 1.0.0 COMPONENTS device_operations)
find_package(hip REQUIRED PATHS /opt/rocm)
message(STATUS "Build with HIP ${hip_VERSION}")
# add all example subdir
file(GLOB dir_list LIST_DIRECTORIES true *)
FOREACH(subdir ${dir_list})
IF(IS_DIRECTORY "${subdir}" AND (NOT "${subdir}" MATCHES "build"))
add_subdirectory(${subdir})
ENDIF()
ENDFOREACH()
##
Client application links to CK library, and therefore CK library needs to be installed before building client applications.
## Build
```bash
mkdir -p client_example/build
cd client_example/build
```
```bash
cmake \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_PREFIX_PATH="/opt/rocm;${PATH_TO_CK_INSTALL_DIRECTORY}" \
..
```
### Build client example
```bash
make -j
```
################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
if(NOT TARGET analyze)
add_custom_target(analyze)
endif()
function(mark_as_analyzer)
add_dependencies(analyze ${ARGN})
endfunction()
################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
include(CMakeParseArguments)
include(Analyzers)
get_filename_component(CLANG_TIDY_EXE_HINT "${CMAKE_CXX_COMPILER}" PATH)
find_program(CLANG_TIDY_EXE
NAMES
clang-tidy
clang-tidy-5.0
clang-tidy-4.0
clang-tidy-3.9
clang-tidy-3.8
clang-tidy-3.7
clang-tidy-3.6
clang-tidy-3.5
HINTS
${CLANG_TIDY_EXE_HINT}
PATH_SUFFIXES
compiler/bin
PATHS
/opt/rocm/llvm/bin
/opt/rocm/hcc
/usr/local/opt/llvm/bin
)
function(find_clang_tidy_version VAR)
execute_process(COMMAND ${CLANG_TIDY_EXE} -version OUTPUT_VARIABLE VERSION_OUTPUT)
separate_arguments(VERSION_OUTPUT_LIST UNIX_COMMAND "${VERSION_OUTPUT}")
list(FIND VERSION_OUTPUT_LIST "version" VERSION_INDEX)
if(VERSION_INDEX GREATER 0)
math(EXPR VERSION_INDEX "${VERSION_INDEX} + 1")
list(GET VERSION_OUTPUT_LIST ${VERSION_INDEX} VERSION)
set(${VAR} ${VERSION} PARENT_SCOPE)
else()
set(${VAR} "0.0" PARENT_SCOPE)
endif()
endfunction()
if( NOT CLANG_TIDY_EXE )
message( STATUS "Clang tidy not found" )
set(CLANG_TIDY_VERSION "0.0")
else()
find_clang_tidy_version(CLANG_TIDY_VERSION)
message( STATUS "Clang tidy found: ${CLANG_TIDY_VERSION}")
endif()
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CLANG_TIDY_FIXIT_DIR ${CMAKE_BINARY_DIR}/fixits)
file(MAKE_DIRECTORY ${CLANG_TIDY_FIXIT_DIR})
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${CLANG_TIDY_FIXIT_DIR})
macro(enable_clang_tidy)
set(options ANALYZE_TEMPORARY_DTORS ALL)
set(oneValueArgs HEADER_FILTER)
set(multiValueArgs CHECKS ERRORS EXTRA_ARGS)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
string(REPLACE ";" "," CLANG_TIDY_CHECKS "${PARSE_CHECKS}")
string(REPLACE ";" "," CLANG_TIDY_ERRORS "${PARSE_ERRORS}")
set(CLANG_TIDY_EXTRA_ARGS)
foreach(ARG ${PARSE_EXTRA_ARGS})
list(APPEND CLANG_TIDY_EXTRA_ARGS "-extra-arg=${ARG}")
endforeach()
set(CLANG_TIDY_ALL)
if(PARSE_ALL)
set(CLANG_TIDY_ALL ALL)
endif()
message(STATUS "Clang tidy checks: ${CLANG_TIDY_CHECKS}")
if (${PARSE_ANALYZE_TEMPORARY_DTORS})
set(CLANG_TIDY_ANALYZE_TEMPORARY_DTORS "-analyze-temporary-dtors")
endif()
if (${CLANG_TIDY_VERSION} VERSION_LESS "3.9.0")
set(CLANG_TIDY_ERRORS_ARG "")
else()
set(CLANG_TIDY_ERRORS_ARG "-warnings-as-errors='${CLANG_TIDY_ERRORS}'")
endif()
if (${CLANG_TIDY_VERSION} VERSION_LESS "3.9.0")
set(CLANG_TIDY_QUIET_ARG "")
else()
set(CLANG_TIDY_QUIET_ARG "-quiet")
endif()
if(PARSE_HEADER_FILTER)
string(REPLACE "$" "$$" CLANG_TIDY_HEADER_FILTER "${PARSE_HEADER_FILTER}")
else()
set(CLANG_TIDY_HEADER_FILTER ".*")
endif()
set(CLANG_TIDY_COMMAND
${CLANG_TIDY_EXE}
${CLANG_TIDY_QUIET_ARG}
-p ${CMAKE_BINARY_DIR}
-checks='${CLANG_TIDY_CHECKS}'
${CLANG_TIDY_ERRORS_ARG}
${CLANG_TIDY_EXTRA_ARGS}
${CLANG_TIDY_ANALYZE_TEMPORARY_DTORS}
-header-filter='${CLANG_TIDY_HEADER_FILTER}'
)
add_custom_target(tidy ${CLANG_TIDY_ALL})
mark_as_analyzer(tidy)
add_custom_target(tidy-base)
add_custom_target(tidy-make-fixit-dir COMMAND ${CMAKE_COMMAND} -E make_directory ${CLANG_TIDY_FIXIT_DIR})
add_custom_target(tidy-rm-fixit-dir COMMAND ${CMAKE_COMMAND} -E remove_directory ${CLANG_TIDY_FIXIT_DIR})
add_dependencies(tidy-make-fixit-dir tidy-rm-fixit-dir)
add_dependencies(tidy-base tidy-make-fixit-dir)
endmacro()
function(clang_tidy_check TARGET)
get_target_property(SOURCES ${TARGET} SOURCES)
# TODO: Use generator expressions instead
# COMMAND ${CLANG_TIDY_COMMAND} $<TARGET_PROPERTY:${TARGET},SOURCES>
# COMMAND ${CLANG_TIDY_COMMAND} $<JOIN:$<TARGET_PROPERTY:${TARGET},SOURCES>, >
foreach(SOURCE ${SOURCES})
if((NOT "${SOURCE}" MATCHES "(h|hpp|hxx)$") AND (NOT "${SOURCE}" MATCHES "TARGET_OBJECTS"))
string(MAKE_C_IDENTIFIER "${SOURCE}" tidy_file)
set(tidy_target tidy-target-${TARGET}-${tidy_file})
add_custom_target(${tidy_target}
# for some targets clang-tidy not able to get information from .clang-tidy
DEPENDS ${SOURCE}
COMMAND ${CLANG_TIDY_COMMAND} "-config=\{CheckOptions: \[\{key: bugprone-reserved-identifier.AllowedIdentifiers,value: __HIP_PLATFORM_HCC__\; __HIP_ROCclr__\}\]\}" ${SOURCE} "-export-fixes=${CLANG_TIDY_FIXIT_DIR}/${TARGET}-${tidy_file}.yaml"
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "clang-tidy: Running clang-tidy on target ${SOURCE}..."
)
add_dependencies(${tidy_target} ${TARGET})
add_dependencies(${tidy_target} tidy-base)
add_dependencies(tidy ${tidy_target})
endif()
endforeach()
endfunction()
################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
include(CMakeParseArguments)
include(ProcessorCount)
include(Analyzers)
find_program(CPPCHECK_EXE
NAMES
cppcheck
PATHS
/opt/rocm/bin
)
ProcessorCount(CPPCHECK_JOBS)
set(CPPCHECK_BUILD_DIR ${CMAKE_BINARY_DIR}/cppcheck-build)
file(MAKE_DIRECTORY ${CPPCHECK_BUILD_DIR})
set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES ${CPPCHECK_BUILD_DIR})
macro(enable_cppcheck)
set(options FORCE)
set(oneValueArgs)
set(multiValueArgs CHECKS SUPPRESS DEFINE UNDEFINE INCLUDE SOURCES)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
string(REPLACE ";" "," CPPCHECK_CHECKS "${PARSE_CHECKS}")
string(REPLACE ";" "\n" CPPCHECK_SUPPRESS "${PARSE_SUPPRESS};*:/usr/*")
file(WRITE ${CMAKE_BINARY_DIR}/cppcheck-supressions "${CPPCHECK_SUPPRESS}")
set(CPPCHECK_DEFINES)
foreach(DEF ${PARSE_DEFINE})
set(CPPCHECK_DEFINES "${CPPCHECK_DEFINES} -D${DEF}")
endforeach()
set(CPPCHECK_UNDEFINES)
foreach(DEF ${PARSE_UNDEFINE})
set(CPPCHECK_UNDEFINES "${CPPCHECK_UNDEFINES} -U${DEF}")
endforeach()
set(CPPCHECK_INCLUDES)
foreach(INC ${PARSE_INCLUDE})
set(CPPCHECK_INCLUDES "${CPPCHECK_INCLUDES} -I${INC}")
endforeach()
# set(CPPCHECK_FORCE)
set(CPPCHECK_FORCE "--project=${CMAKE_BINARY_DIR}/compile_commands.json")
if(PARSE_FORCE)
set(CPPCHECK_FORCE --force)
endif()
set(SOURCES)
set(GLOBS)
foreach(SOURCE ${PARSE_SOURCES})
get_filename_component(ABS_SOURCE ${SOURCE} ABSOLUTE)
if(EXISTS ${ABS_SOURCE})
if(IS_DIRECTORY ${ABS_SOURCE})
set(GLOBS "${GLOBS} ${ABS_SOURCE}/*.cpp ${ABS_SOURCE}/*.hpp ${ABS_SOURCE}/*.cxx ${ABS_SOURCE}/*.c ${ABS_SOURCE}/*.h")
else()
set(SOURCES "${SOURCES} ${ABS_SOURCE}")
endif()
else()
set(GLOBS "${GLOBS} ${ABS_SOURCE}")
endif()
endforeach()
file(WRITE ${CMAKE_BINARY_DIR}/cppcheck.cmake "
file(GLOB_RECURSE GSRCS ${GLOBS})
set(CPPCHECK_COMMAND
${CPPCHECK_EXE}
-q
# -v
# --report-progress
${CPPCHECK_FORCE}
--cppcheck-build-dir=${CPPCHECK_BUILD_DIR}
--platform=native
--template=gcc
--error-exitcode=1
-j ${CPPCHECK_JOBS}
${CPPCHECK_DEFINES}
${CPPCHECK_UNDEFINES}
${CPPCHECK_INCLUDES}
--enable=${CPPCHECK_CHECKS}
--inline-suppr
--suppressions-list=${CMAKE_BINARY_DIR}/cppcheck-supressions
${SOURCES} \${GSRCS}
)
string(REPLACE \";\" \" \" CPPCHECK_SHOW_COMMAND \"\${CPPCHECK_COMMAND}\")
message(\"\${CPPCHECK_SHOW_COMMAND}\")
execute_process(
COMMAND \${CPPCHECK_COMMAND}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE RESULT
)
if(NOT RESULT EQUAL 0)
message(FATAL_ERROR \"Cppcheck failed\")
endif()
")
add_custom_target(cppcheck
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_BINARY_DIR}/cppcheck.cmake
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "cppcheck: Running cppcheck..."
)
mark_as_analyzer(cppcheck)
endmacro()
################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
include(CMakeParseArguments)
include(MainDoc)
find_program(DOXYGEN_EXECUTABLE NAMES doxygen
PATH_SUFFIXES bin
DOC "Doxygen documentation generator"
)
mark_as_advanced(DOXYGEN_EXECUTABLE)
find_path(DOT_EXECUTABLE NAMES dot
PATH_SUFFIXES bin
DOC "Graphviz"
)
mark_as_advanced(DOT_EXECUTABLE)
set(DOXYGEN_ARGS
ABBREVIATE_BRIEF
ALIASES
ALLEXTERNALS
ALLOW_UNICODE_NAMES
ALPHABETICAL_INDEX
ALWAYS_DETAILED_SEC
AUTOLINK_SUPPORT
BINARY_TOC
BRIEF_MEMBER_DESC
BUILTIN_STL_SUPPORT
CALLER_GRAPH
CALL_GRAPH
CASE_SENSE_NAMES
CHM_FILE
CHM_INDEX_ENCODING
CITE_BIB_FILES
CLANG_ASSISTED_PARSING
CLANG_OPTIONS
CLASS_DIAGRAMS
CLASS_GRAPH
COLLABORATION_GRAPH
COLS_IN_ALPHA_INDEX
COMPACT_LATEX
COMPACT_RTF
CPP_CLI_SUPPORT
CREATE_SUBDIRS
DIAFILE_DIRS
DIA_PATH
DIRECTORY_GRAPH
DISABLE_INDEX
DISTRIBUTE_GROUP_DOC
DOCBOOK_OUTPUT
DOCBOOK_PROGRAMLISTING
DOCSET_BUNDLE_ID
DOCSET_FEEDNAME
DOCSET_PUBLISHER_ID
DOCSET_PUBLISHER_NAME
DOTFILE_DIRS
DOT_CLEANUP
DOT_FONTNAME
DOT_FONTPATH
DOT_FONTSIZE
DOT_GRAPH_MAX_NODES
DOT_IMAGE_FORMAT
DOT_MULTI_TARGETS
DOT_NUM_THREADS
# DOT_PATH
DOT_TRANSPARENT
DOXYFILE_ENCODING
ECLIPSE_DOC_ID
ENABLED_SECTIONS
ENABLE_PREPROCESSING
ENUM_VALUES_PER_LINE
EXAMPLE_PATH
EXAMPLE_PATTERNS
EXAMPLE_RECURSIVE
EXCLUDE
EXCLUDE_PATTERNS
EXCLUDE_SYMBOLS
EXCLUDE_SYMLINKS
EXPAND_AS_DEFINED
EXPAND_ONLY_PREDEF
EXTENSION_MAPPING
EXTERNAL_GROUPS
EXTERNAL_PAGES
EXTERNAL_SEARCH
EXTERNAL_SEARCH_ID
EXTRACT_ALL
EXTRACT_ANON_NSPACES
EXTRACT_LOCAL_CLASSES
EXTRACT_LOCAL_METHODS
EXTRACT_PACKAGE
EXTRACT_PRIVATE
EXTRACT_STATIC
EXTRA_PACKAGES
EXTRA_SEARCH_MAPPINGS
EXT_LINKS_IN_WINDOW
FILE_PATTERNS
FILE_VERSION_FILTER
FILTER_PATTERNS
FILTER_SOURCE_FILES
FILTER_SOURCE_PATTERNS
FORCE_LOCAL_INCLUDES
FORMULA_FONTSIZE
FORMULA_TRANSPARENT
FULL_PATH_NAMES
GENERATE_AUTOGEN_DEF
GENERATE_BUGLIST
GENERATE_CHI
GENERATE_DEPRECATEDLIST
GENERATE_DOCBOOK
GENERATE_DOCSET
GENERATE_ECLIPSEHELP
GENERATE_HTML
GENERATE_HTMLHELP
GENERATE_LATEX
GENERATE_LEGEND
GENERATE_MAN
GENERATE_PERLMOD
GENERATE_QHP
GENERATE_RTF
GENERATE_TAGFILE
GENERATE_TESTLIST
GENERATE_TODOLIST
GENERATE_TREEVIEW
GENERATE_XML
GRAPHICAL_HIERARCHY
GROUP_GRAPHS
GROUP_NESTED_COMPOUNDS
# HAVE_DOT
HHC_LOCATION
HIDE_COMPOUND_REFERENCE
HIDE_FRIEND_COMPOUNDS
HIDE_IN_BODY_DOCS
HIDE_SCOPE_NAMES
HIDE_UNDOC_CLASSES
HIDE_UNDOC_MEMBERS
HIDE_UNDOC_RELATIONS
HTML_COLORSTYLE_GAMMA
HTML_COLORSTYLE_HUE
HTML_COLORSTYLE_SAT
HTML_DYNAMIC_SECTIONS
HTML_EXTRA_FILES
HTML_EXTRA_STYLESHEET
HTML_FILE_EXTENSION
HTML_FOOTER
HTML_HEADER
HTML_INDEX_NUM_ENTRIES
HTML_OUTPUT
HTML_STYLESHEET
HTML_TIMESTAMP
IDL_PROPERTY_SUPPORT
IGNORE_PREFIX
IMAGE_PATH
INCLUDED_BY_GRAPH
INCLUDE_FILE_PATTERNS
INCLUDE_GRAPH
INCLUDE_PATH
INHERIT_DOCS
INLINE_GROUPED_CLASSES
INLINE_INFO
INLINE_INHERITED_MEMB
INLINE_SIMPLE_STRUCTS
INLINE_SOURCES
INPUT
INPUT_ENCODING
INPUT_FILTER
INTERACTIVE_SVG
INTERNAL_DOCS
JAVADOC_AUTOBRIEF
LATEX_BATCHMODE
LATEX_BIB_STYLE
LATEX_CMD_NAME
LATEX_EXTRA_FILES
LATEX_EXTRA_STYLESHEET
LATEX_FOOTER
LATEX_HEADER
LATEX_HIDE_INDICES
LATEX_OUTPUT
LATEX_SOURCE_CODE
LATEX_TIMESTAMP
LAYOUT_FILE
LOOKUP_CACHE_SIZE
MACRO_EXPANSION
MAKEINDEX_CMD_NAME
MAN_EXTENSION
MAN_LINKS
MAN_OUTPUT
MAN_SUBDIR
MARKDOWN_SUPPORT
MATHJAX_CODEFILE
MATHJAX_EXTENSIONS
MATHJAX_FORMAT
MATHJAX_RELPATH
MAX_DOT_GRAPH_DEPTH
MAX_INITIALIZER_LINES
MSCFILE_DIRS
MSCGEN_PATH
MULTILINE_CPP_IS_BRIEF
OPTIMIZE_FOR_FORTRAN
OPTIMIZE_OUTPUT_FOR_C
OPTIMIZE_OUTPUT_JAVA
OPTIMIZE_OUTPUT_VHDL
OUTPUT_DIRECTORY
OUTPUT_LANGUAGE
PAPER_TYPE
PDF_HYPERLINKS
PERLMOD_LATEX
PERLMOD_MAKEVAR_PREFIX
PERLMOD_PRETTY
PERL_PATH
PLANTUML_CFG_FILE
PLANTUML_INCLUDE_PATH
PLANTUML_JAR_PATH
PREDEFINED
PROJECT_BRIEF
PROJECT_LOGO
PROJECT_NAME
PROJECT_NUMBER
QCH_FILE
QHG_LOCATION
QHP_CUST_FILTER_ATTRS
QHP_CUST_FILTER_NAME
QHP_NAMESPACE
QHP_SECT_FILTER_ATTRS
QHP_VIRTUAL_FOLDER
QT_AUTOBRIEF
QUIET
RECURSIVE
REFERENCED_BY_RELATION
REFERENCES_LINK_SOURCE
REFERENCES_RELATION
REPEAT_BRIEF
RTF_EXTENSIONS_FILE
RTF_HYPERLINKS
RTF_OUTPUT
RTF_SOURCE_CODE
RTF_STYLESHEET_FILE
SEARCHDATA_FILE
SEARCHENGINE
SEARCHENGINE_URL
SEARCH_INCLUDES
SEPARATE_MEMBER_PAGES
SERVER_BASED_SEARCH
SHORT_NAMES
SHOW_FILES
SHOW_GROUPED_MEMB_INC
SHOW_INCLUDE_FILES
SHOW_NAMESPACES
SHOW_USED_FILES
SIP_SUPPORT
SKIP_FUNCTION_MACROS
SORT_BRIEF_DOCS
SORT_BY_SCOPE_NAME
SORT_GROUP_NAMES
SORT_MEMBERS_CTORS_1ST
SORT_MEMBER_DOCS
SOURCE_BROWSER
SOURCE_TOOLTIPS
STRICT_PROTO_MATCHING
STRIP_CODE_COMMENTS
STRIP_FROM_INC_PATH
STRIP_FROM_PATH
SUBGROUPING
TAB_SIZE
TAGFILES
TCL_SUBST
TEMPLATE_RELATIONS
TOC_EXPAND
TOC_INCLUDE_HEADINGS
TREEVIEW_WIDTH
TYPEDEF_HIDES_STRUCT
UML_LIMIT_NUM_FIELDS
UML_LOOK
USE_HTAGS
USE_MATHJAX
USE_MDFILE_AS_MAINPAGE
USE_PDFLATEX
VERBATIM_HEADERS
WARNINGS
WARN_AS_ERROR
WARN_FORMAT
WARN_IF_DOC_ERROR
WARN_IF_UNDOCUMENTED
WARN_LOGFILE
WARN_NO_PARAMDOC
XML_OUTPUT
XML_PROGRAMLISTING
)
set(DOXYGEN_CONFIG_FILE "${CMAKE_CURRENT_BINARY_DIR}/doxygen/doxygen.conf" CACHE PATH "Path to generated doxygen configuration file")
function(add_doxygen_doc)
set(options)
set(oneValueArgs)
set(multiValueArgs DEPENDS ${DOXYGEN_ARGS})
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
file(WRITE ${DOXYGEN_CONFIG_FILE} "# Auto-generated doxygen configuration file\n")
foreach(ARG ${DOXYGEN_ARGS})
if(PARSE_${ARG})
string(REPLACE ";" " " ARG_VALUE ${PARSE_${ARG}})
file(APPEND ${DOXYGEN_CONFIG_FILE} "\n${ARG} = ${ARG_VALUE}\n")
endif()
endforeach()
if(PARSE_OUTPUT_DIRECTORY)
if(NOT EXISTS ${PARSE_OUTPUT_DIRECTORY})
file(MAKE_DIRECTORY ${PARSE_OUTPUT_DIRECTORY})
endif()
endif()
if(DOT_EXECUTABLE)
file(APPEND ${DOXYGEN_CONFIG_FILE} "\nDOT_PATH = \"${DOT_EXECUTABLE}\"\n")
file(APPEND ${DOXYGEN_CONFIG_FILE} "\nHAVE_DOT = YES\n")
else()
file(APPEND ${DOXYGEN_CONFIG_FILE} "\nHAVE_DOT = NO\n")
endif()
add_custom_target(doxygen
${DOXYGEN_EXECUTABLE} ${DOXYGEN_CONFIG_FILE}
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMENT "Building documentation with doxygen"
)
if(PARSE_OUTPUT_DIRECTORY)
clean_doc_output(${PARSE_OUTPUT_DIRECTORY})
endif()
mark_as_doc(doxygen)
if(PARSE_DEPENDS)
add_dependencies(doxygen ${PARSE_DEPENDS})
endif()
endfunction()
################################################################################
#
# MIT License
#
# Copyright (c) 2017 Advanced Micro Devices, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
################################################################################
# - Enable warning all for gcc/clang or use /W4 for visual studio
## Strict warning level
if (MSVC)
# Use the highest warning level for visual studio.
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /w")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /w")
# set(CMAKE_CXX_WARNING_LEVEL 4)
# if (CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
# string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
# else ()
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
# endif ()
# set(CMAKE_C_WARNING_LEVEL 4)
# if (CMAKE_C_FLAGS MATCHES "/W[0-4]")
# string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
# else ()
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /W4")
# endif ()
else()
foreach(COMPILER C CXX)
set(CMAKE_COMPILER_WARNINGS)
# use -Wall for gcc and clang
list(APPEND CMAKE_COMPILER_WARNINGS
-Wall
-Wextra
-Wcomment
-Wendif-labels
-Wformat
-Winit-self
-Wreturn-type
-Wsequence-point
# Shadow is broken on gcc when using lambdas
# -Wshadow
-Wswitch
-Wtrigraphs
-Wundef
-Wuninitialized
-Wunreachable-code
-Wunused
-Wsign-compare
-Wno-extra-semi-stmt
)
if (CMAKE_${COMPILER}_COMPILER_ID MATCHES "Clang")
list(APPEND CMAKE_COMPILER_WARNINGS
-Weverything
-Wno-c++98-compat
-Wno-c++98-compat-pedantic
-Wno-conversion
-Wno-double-promotion
-Wno-exit-time-destructors
-Wno-extra-semi
-Wno-float-conversion
-Wno-gnu-anonymous-struct
-Wno-gnu-zero-variadic-macro-arguments
-Wno-missing-prototypes
-Wno-nested-anon-types
-Wno-padded
-Wno-return-std-move-in-c++11
-Wno-shorten-64-to-32
-Wno-sign-conversion
-Wno-unknown-warning-option
-Wno-unused-command-line-argument
-Wno-weak-vtables
-Wno-covered-switch-default
)
else()
if (CMAKE_${COMPILER}_COMPILER_ID MATCHES "GNU" AND ${COMPILER} MATCHES "CXX")
# cmake 3.5.2 does not support >=.
if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS "6.1")
list(APPEND CMAKE_COMPILER_WARNINGS
-Wno-ignored-attributes)
endif()
endif()
list(APPEND CMAKE_COMPILER_WARNINGS
-Wno-missing-field-initializers
-Wno-deprecated-declarations
)
endif()
add_definitions(${CMAKE_COMPILER_WARNINGS})
endforeach()
endif ()
function(get_target_property2 VAR TARGET PROPERTY)
get_target_property(_pflags ${TARGET} ${PROPERTY})
if(_pflags)
set(${VAR} ${_pflags} PARENT_SCOPE)
else()
set(${VAR} "" PARENT_SCOPE)
endif()
endfunction()
macro(append_flags FLAGS TARGET PROPERTY PREFIX)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
else()
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}")
endif()
endforeach()
endmacro()
macro(append_link_flags FLAGS TARGET PROPERTY)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
elseif(FLAG MATCHES "^-.*")
string(APPEND ${FLAGS} " ${FLAG}")
elseif(EXISTS ${FLAG})
string(APPEND ${FLAGS} " ${FLAG}")
else()
string(APPEND ${FLAGS} " -l${FLAG}")
endif()
endforeach()
endmacro()
function(target_flags FLAGS TARGET)
set(_flags)
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_OPTIONS" "")
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_DEFINITIONS" "-D")
append_flags(_flags ${TARGET} "INTERFACE_INCLUDE_DIRECTORIES" "-isystem ")
append_flags(_flags ${TARGET} "INTERFACE_LINK_DIRECTORIES" "-L ")
append_flags(_flags ${TARGET} "INTERFACE_LINK_OPTIONS" "")
append_link_flags(_flags ${TARGET} "INTERFACE_LINK_LIBRARIES" "")
# message("_flags: ${_flags}")
set(${FLAGS} ${_flags} PARENT_SCOPE)
endfunction()
include(FetchContent)
set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
if(GOOGLETEST_DIR)
set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
endif()
message(STATUS "Fetching GoogleTest")
list(APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef
-Wno-reserved-identifier
-Wno-global-constructors
-Wno-missing-noreturn
-Wno-disabled-macro-expansion
-Wno-used-but-marked-unused
-Wno-switch-enum
-Wno-zero-as-null-pointer-constant
-Wno-unused-member-function
-Wno-comma
-Wno-old-style-cast
-Wno-deprecated
)
message(STATUS "Suppressing googltest warnings with flags: ${GTEST_CMAKE_CXX_FLAGS}")
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG b85864c64758dec007208e56af933fc3f52044ee
)
# Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
FetchContent_Populate(googletest)
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gmock PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gmock_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
set_target_properties(gtest PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gtest_main PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gmock PROPERTIES POSITION_INDEPENDENT_CODE ON)
set_target_properties(gmock_main PROPERTIES POSITION_INDEPENDENT_CODE ON)
## CK docker hub
[Docker hub](https://hub.docker.com/r/rocm/composable_kernel)
## Why do I need this?
To make our lives easier and bring Composable Kernel dependencies together, we recommend using docker images.
## So what is Composable Kernel?
Composable Kernel (CK) library aims to provide a programming model for writing performance critical kernels for machine learning workloads across multiple architectures including GPUs, CPUs, etc, through general purpose kernel languages, like HIP C++.
To get the CK library
```
git clone https://github.com/ROCmSoftwarePlatform/composable_kernel.git
```
run a docker container
```
docker run \
-it \
--privileged \
--group-add sudo \
-w /root/workspace \
-v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \
rocm/composable_kernel:ck_ub20.04_rocm5.3_release \
/bin/bash
```
and build the CK
```
mkdir build && cd build
# Need to specify target ID, example below is for gfx908 and gfx90a
cmake \
-D CMAKE_PREFIX_PATH=/opt/rocm \
-D CMAKE_CXX_COMPILER=/opt/rocm/bin/hipcc \
-D CMAKE_CXX_FLAGS="-O3" \
-D CMAKE_BUILD_TYPE=Release \
-D GPU_TARGETS="gfx908;gfx90a" \
..
```
and
```
make -j examples tests
```
To run all the test cases including tests and examples run
```
make test
```
We can also run specific examples or tests like
```
./bin/example_gemm_xdl_fp16
./bin/test_gemm_fp16
```
For more details visit [CK github repo](https://github.com/ROCmSoftwarePlatform/composable_kernel), [CK examples](https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/example), [even more CK examples](https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/client_example).
## And what is inside?
The docker images have everything you need for running CK including:
* [ROCm](https://www.amd.com/en/graphics/servers-solutions-rocm)
* [CMake](https://cmake.org/)
* [Compiler](https://github.com/RadeonOpenCompute/llvm-project)
## Which image is right for me?
Let's take a look at the image naming, for example "ck_ub20.04_rocm5.4_release". The image specs are:
* "ck" - made for running Composable Kernel
* "ub20.04" - based on Ubuntu 20.04
* "rocm5.4" - ROCm platform version 5.4
* "release" - compiler version is release
So just pick the right image for your project dependencies and you're all set.
## DIY starts here
If you need to customize a docker image or just can't stop tinkering, feel free to adjust the [Dockerfile](https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/develop/Dockerfile) for your needs.
## License
CK is released under the MIT [license](https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/develop/LICENSE).
add_custom_target(example_gemm_dl)
add_example_executable(example_gemm_dl_fp32 gemm_dl_fp32.cpp)
add_example_executable(example_gemm_dl_fp16 gemm_dl_fp16.cpp)
add_example_executable(example_gemm_dl_int8 gemm_dl_int8.cpp)
add_dependencies(example_gemm_dl example_gemm_dl_fp32)
add_dependencies(example_gemm_dl example_gemm_dl_fp16)
add_dependencies(example_gemm_dl example_gemm_dl_int8)
if(USE_BITINT_EXTENSION_INT4)
add_example_executable(example_gemm_dl_int4 gemm_dl_int4.cpp)
add_dependencies(example_gemm_dl example_gemm_dl_int4)
endif(USE_BITINT_EXTENSION_INT4)
add_custom_target(example_gemm_xdl)
add_example_executable(example_gemm_xdl_fp16 gemm_xdl_fp16.cpp)
add_example_executable(example_gemm_xdl_bf16 gemm_xdl_bf16.cpp)
add_example_executable(example_gemm_xdl_int8 gemm_xdl_int8.cpp)
add_dependencies(example_gemm_xdl example_gemm_xdl_fp16)
add_dependencies(example_gemm_xdl example_gemm_xdl_bf16)
add_dependencies(example_gemm_xdl example_gemm_xdl_int8)
if(USE_BITINT_EXTENSION_INT4)
add_example_executable(example_gemm_xdl_int4 gemm_xdl_int4.cpp)
add_dependencies(example_gemm_xdl example_gemm_xdl_int4)
endif(USE_BITINT_EXTENSION_INT4)
add_example_executable(example_gemm_xdl_skip_b_lds_fp16 gemm_xdl_skip_b_lds_fp16.cpp)
# FIXME: re-enable this exampe as test when SWDEV-335738 is fixed
add_example_executable_no_testing(example_gemm_xdl_fp64 gemm_xdl_fp64.cpp)
add_dependencies(example_gemm_xdl example_gemm_xdl_skip_b_lds_fp16)
add_dependencies(example_gemm_xdl example_gemm_xdl_fp64)
# Instructions for ```example_gemm_xdl```
## Run ```example_gemm_xdl```
```bash
#arg1: verification (0=no, 1=yes)
#arg2: initialization (0=no init, 1=integer value, 2=decimal value)
#arg3: run kernel # of times (>1)
./bin/example_gemm_xdl 0 1 5
```
Result (MI100 @ 1087Mhz, 133.5TFlops peak FP16)
```
a_m_k: dim 2, lengths {3840, 4096}, strides {4096, 1}
b_k_n: dim 2, lengths {4096, 4096}, strides {1, 4096}
c_m_n: dim 2, lengths {3840, 4096}, strides {4096, 1}
arg.a_grid_desc_k0_m_k1_{512, 3840, 8}
arg.b_grid_desc_k0_n_k1_{512, 4096, 8}
arg.c_grid_desc_m_n_{ 3840, 4096}
launch_and_time_kernel: grid_dim {480, 1, 1}, block_dim {256, 1, 1}
Warm up
Start running 5 times...
Perf: 1.19685 ms, 107.657 TFlops, 78.8501 GB/s
```
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <numeric>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/literals.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
struct ProblemSize final
{
ck::index_t M = 3840;
ck::index_t N = 4096;
ck::index_t K = 4096;
ck::index_t StrideA = 4096;
ck::index_t StrideB = 4096;
ck::index_t StrideC = 4096;
};
struct ExecutionConfig final
{
bool do_verification = true;
int init_method = 1;
bool time_kernel = false;
};
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
inline bool
parse_cmd_args(int argc, char* argv[], ProblemSize& problem_size, ExecutionConfig& config)
{
if(argc == 1)
{
// use default case
}
else if(argc == 4)
{
config.do_verification = std::stoi(argv[1]);
config.init_method = std::stoi(argv[2]);
config.time_kernel = std::stoi(argv[3]);
}
else if(argc == 10)
{
config.do_verification = std::stoi(argv[1]);
config.init_method = std::stoi(argv[2]);
config.time_kernel = std::stoi(argv[3]);
problem_size.M = std::stoi(argv[4]);
problem_size.N = std::stoi(argv[5]);
problem_size.K = std::stoi(argv[6]);
problem_size.StrideA = std::stoi(argv[7]);
problem_size.StrideB = std::stoi(argv[8]);
problem_size.StrideC = std::stoi(argv[9]);
}
else
{
std::cerr << "arg1: verification (0=no, 1=yes)" << std::endl
<< "arg2: initialization (0=no init, 1=integer value, 2=decimal value)"
<< std::endl
<< "arg3: time kernel (0=no, 1=yes)" << std::endl
<< "arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC" << std::endl;
return false;
}
return true;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "common.hpp"
#include "ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp"
using ADataType = ck::half_t;
using BDataType = ck::half_t;
using CDataType = ck::half_t;
using AccDataType = float;
using ALayout = Col;
using BLayout = Row;
using CLayout = Row;
using AElementOp = PassThrough;
using BElementOp = PassThrough;
using CElementOp = PassThrough;
static constexpr auto GemmDefault = ck::tensor_operation::device::GemmSpecialization::Default;
// clang-format off
using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmDl
// ######| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| M1Per| N1Per| KPer| M11N11Thread| M11N11Thread| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| BBlockTransfer| CThreadTransfer| CThreadTransfer| CThreadTransfer|
// ######| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise| Spacialization| Size| Block| Block| Block| | ThreadM111| ThreadN111| Thread| ClusterM110Xs| ClusterN110Xs| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| ThreadSliceLengths| ThreadClusterLengths| ThreadCluster| SrcAccess| SrcVectorTensor| SrcVectorTensor| DstVectorTensor| SrcDstAccess| SrcDstVectorDim| DstScalarPerVector|
// ######| | | | | | | | Operation| Operation| Operation| | | | | | | | | | | | K0_M0_M1_K1| K0_M0_M1_K1| ArrangeOrder| Order| Lengths_K0_M0_M1_K1| ContiguousDimOrder| Lengths_K0_M0_M1_K1| K0_N0_N1_K1| K0_N0_N1_K1| ArrangeOrder| Order| Lengths_K0_N0_N1_K1| ContiguousDimOrder| Lengths_K0_N0_N1_K1| Order| | |
// ######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
< ADataType, BDataType, CDataType, AccDataType, ALayout, BLayout, CLayout, AElementOp, BElementOp, CElementOp, GemmDefault, 256, 128, 128, 16, 2, 4, 4, 1, S<8, 2>, S<8, 2>, S<2, 1, 4, 2>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 2>, S<2, 1, 4, 2>, S<8, 1, 32, 1>, S<0, 3, 1, 2>, S<0, 3, 1, 2>, S<1, 1, 4, 1>, S<0, 3, 1, 2>, S<1, 1, 4, 2>, S<0, 1, 2, 3, 4, 5>, 5, 4>;
// clang-format on
using ReferenceGemmInstance = ck::tensor_operation::host::
ReferenceGemm<ADataType, BDataType, CDataType, AccDataType, AElementOp, BElementOp, CElementOp>;
#include "run_gemm_example.inc"
int main(int argc, char* argv[]) { return !run_gemm_example(argc, argv); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment