Commit 6e4a1075 authored by Paul's avatar Paul
Browse files

Format

parent 94bfa502
find_program(EMBED_LD ld)
find_program(EMBED_OBJCOPY objcopy)
option(EMBED_USE_LD "Use ld to embed data files" ON)
function(wrap_string)
set(options)
set(oneValueArgs VARIABLE AT_COLUMN)
set(multiValueArgs)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cmake_parse_arguments(WRAP_STRING "${options}" "${oneValueArgs}" "" ${ARGN})
string(LENGTH ${${PARSE_VARIABLE}} string_length)
math(EXPR offset "0")
while(string_length GREATER 0)
if(string_length GREATER ${PARSE_AT_COLUMN})
math(EXPR length "${PARSE_AT_COLUMN}")
else()
math(EXPR length "${string_length}")
endif()
string(SUBSTRING ${${PARSE_VARIABLE}} ${offset} ${length} line)
set(lines "${lines}\n${line}")
math(EXPR string_length "${string_length} - ${length}")
math(EXPR offset "${offset} + ${length}")
endwhile()
set(${PARSE_VARIABLE} "${lines}" PARENT_SCOPE)
endfunction()
function(generate_embed_source EMBED_NAME)
set(options)
set(oneValueArgs SRC HEADER RELATIVE)
set(multiValueArgs OBJECTS SYMBOLS FILES)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(EXTERNS)
set(INIT_KERNELS)
list(LENGTH PARSE_SYMBOLS SYMBOLS_LEN)
list(LENGTH PARSE_OBJECTS OBJECTS_LEN)
if(NOT ${SYMBOLS_LEN} EQUAL ${OBJECTS_LEN})
message(FATAL_ERROR "Symbols and objects dont match: ${SYMBOLS_LEN} != ${OBJECTS_LEN}")
endif()
math(EXPR LEN "${SYMBOLS_LEN} - 1")
foreach(idx RANGE ${LEN})
list(GET PARSE_SYMBOLS ${idx} SYMBOL)
list(GET PARSE_OBJECTS ${idx} OBJECT)
list(GET PARSE_FILES ${idx} FILE)
set(START_SYMBOL "_binary_${SYMBOL}_start")
set(END_SYMBOL "_binary_${SYMBOL}_end")
if(EMBED_USE_LD)
string(APPEND EXTERNS "
extern const char ${START_SYMBOL}[];
extern const char ${END_SYMBOL}[];
")
else()
string(APPEND EXTERNS "
extern const char ${START_SYMBOL}[];
extern const char* ${END_SYMBOL};
")
endif()
if(PARSE_RELATIVE)
file(RELATIVE_PATH BASE_NAME ${PARSE_RELATIVE} "${FILE}")
else()
get_filename_component(BASE_NAME "${FILE}" NAME)
endif()
string(APPEND INIT_KERNELS "
{ \"${BASE_NAME}\", { ${START_SYMBOL}, ${END_SYMBOL}} },
")
endforeach()
file(WRITE "${PARSE_HEADER}" "
#include <unordered_map>
#include <string>
#include <utility>
const std::unordered_map<std::string, std::pair<const char*,const char*>>& ${EMBED_NAME}();
")
file(WRITE "${PARSE_SRC}" "
#include <${EMBED_NAME}.hpp>
${EXTERNS}
const std::unordered_map<std::string, std::pair<const char*,const char*>>& ${EMBED_NAME}()
{
static const std::unordered_map<std::string, std::pair<const char*,const char*>> result = {${INIT_KERNELS}};
return result;
}
")
endfunction()
function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
set(WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
# Glob is used to compute the relative path
file(GLOB FILES RELATIVE ${WORKING_DIRECTORY} ${FILE})
foreach(REL_FILE ${FILES})
string(MAKE_C_IDENTIFIER "${REL_FILE}" SYMBOL)
get_filename_component(OUTPUT_FILE_DIR "${REL_FILE}" DIRECTORY)
file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/embed")
if(EMBED_USE_LD)
set(OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/embed/${SYMBOL}.o")
else()
set(OUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/embed/${SYMBOL}.cpp")
endif()
set(${OUTPUT_SYMBOL} ${SYMBOL} PARENT_SCOPE)
set(${OUTPUT_FILE} "${OUT_FILE}" PARENT_SCOPE)
if(EMBED_USE_LD)
add_custom_command(
OUTPUT "${OUT_FILE}"
COMMAND ${EMBED_LD} -r -o "${OUT_FILE}" -z noexecstack --format=binary "${REL_FILE}"
COMMAND ${EMBED_OBJCOPY} --rename-section .data=.rodata,alloc,load,readonly,data,contents "${OUT_FILE}"
WORKING_DIRECTORY ${WORKING_DIRECTORY}
DEPENDS ${FILE}
VERBATIM
)
else()
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${FILE})
# reads source file contents as hex string
file(READ ${FILE} HEX_STRING HEX)
# wraps the hex string into multiple lines
wrap_string(VARIABLE HEX_STRING AT_COLUMN 80)
# adds '0x' prefix and comma suffix before and after every byte respectively
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1, " ARRAY_VALUES ${HEX_STRING})
# removes trailing comma
string(REGEX REPLACE ", $" "" ARRAY_VALUES ${ARRAY_VALUES})
file(WRITE "${OUT_FILE}" "
extern const char _binary_${SYMBOL}_start[] = { ${ARRAY_VALUES} };
extern const char* _binary_${SYMBOL}_end = _binary_${SYMBOL}_start + sizeof(_binary_${SYMBOL}_start);
\n")
endif()
endforeach()
endfunction()
function(add_embed_library EMBED_NAME)
set(options)
set(oneValueArgs RELATIVE)
set(multiValueArgs)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/embed)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/embed/${EMBED_NAME})
set(EMBED_DIR ${CMAKE_CURRENT_BINARY_DIR}/embed/${EMBED_NAME})
set(SRC_FILE "${EMBED_DIR}/${EMBED_NAME}.cpp")
set(HEADER_FILE "${EMBED_DIR}/include/${EMBED_NAME}.hpp")
set(WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set(OUTPUT_FILES)
set(SYMBOLS)
message(STATUS "Embedding files")
foreach(FILE ${PARSE_UNPARSED_ARGUMENTS})
embed_file(OUTPUT_FILE OUTPUT_SYMBOL ${FILE})
list(APPEND OUTPUT_FILES ${OUTPUT_FILE})
list(APPEND SYMBOLS ${OUTPUT_SYMBOL})
endforeach()
message(STATUS "Generating embedding library ${EMBED_NAME}")
generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS} RELATIVE ${PARSE_RELATIVE} FILES ${PARSE_UNPARSED_ARGUMENTS})
set(INTERNAL_EMBED_LIB embed_lib_${EMBED_NAME})
add_library(${INTERNAL_EMBED_LIB} OBJECT "${SRC_FILE}")
target_include_directories(${INTERNAL_EMBED_LIB} PRIVATE "${EMBED_DIR}/include")
target_compile_options(${INTERNAL_EMBED_LIB} PRIVATE -Wno-reserved-identifier -Wno-extern-initializer -Wno-missing-variable-declarations)
set_target_properties(${INTERNAL_EMBED_LIB} PROPERTIES POSITION_INDEPENDENT_CODE On)
add_library(${EMBED_NAME} INTERFACE)
if(EMBED_USE_LD)
target_sources(${EMBED_NAME} INTERFACE ${OUTPUT_FILES})
else()
target_sources(${INTERNAL_EMBED_LIB} PRIVATE ${OUTPUT_FILES})
endif()
target_sources(${EMBED_NAME} INTERFACE $<TARGET_OBJECTS:${INTERNAL_EMBED_LIB}>)
target_include_directories(${EMBED_NAME} INTERFACE "${EMBED_DIR}/include")
endfunction()
cmake_minimum_required(VERSION 3.16)
project(composable_kernel_host)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
set(CK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..)
find_package(ROCM)
include(ROCMInstallTargets)
include(ROCMTest)
list(APPEND CMAKE_MODULE_PATH ${CK_ROOT}/cmake)
include(Embed)
file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS
${CK_ROOT}/include/ck/*.hpp)
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
message(STATUS "RELATIVE: ${CK_ROOT}/include")
add_embed_library(ck_headers ${KERNEL_FILES} RELATIVE ${CK_ROOT}/include)
add_definitions(-std=c++17)
file(GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp)
# TODO: Use object library
add_library(ck_host STATIC ${SOURCES})
target_link_libraries(ck_host PRIVATE ck_headers)
# TODO: Set -fPIC
set_target_properties(ck_host PROPERTIES LINKER_LANGUAGE CXX)
target_include_directories(ck_host PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
)
add_executable(ck-template-driver driver/main.cpp)
target_link_libraries(ck-template-driver ck_host)
rocm_install(
TARGETS ck_host ck_headers
EXPORT ck_hostTargets
)
rocm_install(DIRECTORY include/ck DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(BUILD_TESTING)
add_subdirectory(test)
endif()
#include <functional>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
#include "ck/host/device_gemm_multiple_d/operation.hpp"
#include "ck/host/stringutils.hpp"
struct Emitters
{
std::unordered_map<std::string, std::function<std::vector<std::string>()>> m;
template <class T>
void Register(const std::string& name)
{
m[name] = [] {
auto ops = T::CreateOperations();
return ck::host::Transform(
ops, [](const auto& op) { return op.ToSolution().ToTemplateString(); });
};
}
std::string Emit(const std::string& name)
{
return "std::tuple<\n" + ck::host::JoinStrings(m.at(name)(), ",\n") + ">";
}
std::vector<std::string> List() const
{
return ck::host::Transform(m, [](auto&& p) { return p.first; });
}
};
int main(int argc, const char* argv[])
{
std::string prog = argv[0];
std::vector<std::string> args(argv + 1, argv + argc);
Emitters e;
e.Register<ck::host::device_gemm_multiple_d::Operation_Xdl_CShuffle>(
"DeviceGemmMultipleD_Xdl_CShuffle");
if(args.empty() or std::any_of(args.begin(), args.end(), [](auto arg) {
return arg == "-h" or arg == "--help";
}))
{
std::cout << "USAGE:" << std::endl;
std::cout << " " << prog << " [TEMPLATE]" << std::endl;
std::cout << std::endl;
std::cout << "FLAGS:" << std::endl;
std::cout << " -h, --help Show help" << std::endl;
std::cout << std::endl;
std::cout << "TEMPLATES:" << std::endl;
for(auto x : e.List())
std::cout << " " << x << std::endl;
std::cout << std::endl;
return 0;
}
for(auto name : args)
std::cout << e.Emit(name) << std::endl;
return 0;
}
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <vector>
#include <memory>
#include <sstream>
#include <iterator>
#include <numeric>
#include "ck/host/common.hpp"
namespace ck {
namespace host {
namespace device_gemm_multiple_d {
struct Problem
{
std::size_t M = 0;
std::size_t N = 0;
std::size_t K = 0;
bool TransA = false;
bool TransB = false;
bool TransE = false;
std::vector<bool> DsTrans = {};
DataType ADataType = DataType::Half;
DataType BDataType = DataType::Half;
DataType EDataType = DataType::Half;
std::vector<DataType> DsDataType = {};
std::string AElementOp = "ck::tensor_operation::element_wise::PassThrough";
std::string BElementOp = "ck::tensor_operation::element_wise::PassThrough";
std::string CDEElementOp = "ck::Tuple<>";
};
} // namespace device_gemm_multiple_d
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <vector>
#include <string>
#include "ck/host/types.hpp"
#include "ck/host/operation/gemm.hpp"
#include "ck/host/device_gemm_multiple_d/problem.hpp"
namespace ck {
namespace host {
namespace device_gemm_multiple_d {
struct Operation_Xdl_CShuffle
{
static std::vector<Operation_Xdl_CShuffle> CreateOperations();
static std::vector<Operation_Xdl_CShuffle> CreateOperations(const Problem& prob);
TensorDesc A{};
TensorDesc B{};
DataType acc = DataType::Half;
DataType cs_type = DataType::Half;
std::vector<TensorDesc> Ds = {};
TensorDesc E{};
std::string a_elem_op = PassThrough;
std::string b_elem_op = PassThrough;
std::string cde_elem_op = Bilinear;
std::string gemm_specialization = "ck::tensor_operation::device::GemmSpecialization::Default";
operation::TileDesc tile_desc{};
operation::BlockTransferDesc a_block_transfer{};
operation::BlockTransferDesc b_block_transfer{};
operation::CShuffleDesc cshuffle{};
operation::CBlockTransferDesc c_block_transfer{};
Solution ToSolution() const;
};
} // namespace device_gemm_multiple_d
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdlib>
#include <vector>
#include <string>
#include "ck/host/types.hpp"
namespace ck {
namespace host {
namespace device_gemm_multiple_d {
struct Problem
{
std::size_t M = 0;
std::size_t N = 0;
std::size_t K = 0;
bool TransA = false;
bool TransB = false;
bool TransE = false;
std::vector<bool> DsTrans = {};
DataType ADataType = DataType::Half;
DataType BDataType = DataType::Half;
DataType EDataType = DataType::Half;
std::vector<DataType> DsDataType = {};
std::string AElementOp = PassThrough;
std::string BElementOp = PassThrough;
std::string CDEElementOp = "ck::Tuple<>";
};
} // namespace device_gemm_multiple_d
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <string>
#include <utility>
#include <unordered_map>
#include <vector>
namespace ck {
namespace host {
std::unordered_map<std::string, std::pair<const char*, const char*>> GetHeaders();
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <string>
namespace ck {
namespace host {
namespace operation {
struct TileDesc
{
int block_size = 0;
int m_per_block = 0;
int n_per_block = 0;
int k_per_block = 0;
int ak1 = 0;
int bk1 = 0;
int m_per_XDL = 0;
int n_per_XDL = 0;
int m_Xdl_per_wave = 0;
int n_Xdl_per_wave = 0;
int num_gemmk_prefetch_stage = 0;
};
struct BlockTransferDesc
{
std::string thread_cluster_length = "";
std::string thread_cluster_arrange_order = "";
std::string src_access_order = "";
int src_vec_dim = 0;
int src_scalar_per_vector = 0;
int dst_scalar_per_vector_k1 = 0;
int lds_add_extra_dim = 0;
};
struct CShuffleDesc
{
int m_Xdl_per_wave_per_shuffle = 0;
int n_Xdl_per_wave_per_shuffle = 0;
};
struct CBlockTransferDesc
{
std::string cluster_lengths_m_block_m_wave_m_per_Xdl_n_block_n_wave_n_per_Xdl = "";
int scalar_per_vector_n_wave_n_per_Xdl = 0;
};
} // namespace operation
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <algorithm>
#include <cassert>
#include <numeric>
#include <string>
#include <utility>
#include <unordered_map>
#include <vector>
namespace ck {
namespace host {
template <class F>
std::string trim(const std::string& s, F f)
{
auto start = std::find_if_not(s.begin(), s.end(), f);
auto last = std::find_if_not(s.rbegin(), std::string::const_reverse_iterator(start), f).base();
return {start, last};
}
inline std::string trim(const std::string& s)
{
return trim(s, [](unsigned char c) { return std::isspace(c); });
}
template <class Strings>
inline std::string JoinStrings(Strings strings, const std::string& delim)
{
auto it = strings.begin();
if(it == strings.end())
return "";
auto nit = std::next(it);
return std::accumulate(nit, strings.end(), *it, [&](std::string x, std::string y) {
return std::move(x) + delim + std::move(y);
});
}
template <class F>
inline std::string
InterpolateString(const std::string& input, F f, std::string start = "${", std::string end = "}")
{
std::string result = "";
result.reserve(input.size());
auto it = input.begin();
while(it != input.end())
{
auto next_start = std::search(it, input.end(), start.begin(), start.end());
auto next_end = std::search(next_start, input.end(), end.begin(), end.end());
result.append(it, next_start);
if(next_start == input.end())
break;
if(next_end == input.end())
{
throw std::runtime_error("Unbalanced brackets");
}
auto r = f(next_start + start.size(), next_end);
result.append(r.begin(), r.end());
it = next_end + end.size();
}
return result;
}
inline std::string InterpolateString(const std::string& input,
const std::unordered_map<std::string, std::string>& vars,
std::string start = "${",
std::string end = "}")
{
return InterpolateString(
input,
[&](auto start_it, auto last_it) {
auto key = trim({start_it, last_it});
auto it = vars.find(key);
if(it == vars.end())
throw std::runtime_error("Unknown key: " + key);
return it->second;
},
std::move(start),
std::move(end));
}
template <class Range, class F>
inline auto Transform(const Range& r, F f) -> std::vector<decltype(f(*r.begin()))>
{
std::vector<decltype(f(*r.begin()))> result;
std::transform(r.begin(), r.end(), std::back_inserter(result), f);
return result;
}
template <class Range1, class Range2, class F>
inline auto Transform(const Range1& r1, const Range2& r2, F f)
-> std::vector<decltype(f(*r1.begin(), *r2.begin()))>
{
std::vector<decltype(f(*r1.begin(), *r2.begin()))> result;
assert(std::distance(r1.begin(), r1.end()) == std::distance(r2.begin(), r2.end()));
std::transform(r1.begin(), r1.end(), r2.begin(), std::back_inserter(result), f);
return result;
}
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <string>
#include <utility>
#include <unordered_map>
#include <vector>
namespace ck {
namespace host {
struct Solution
{
Solution() = default;
Solution(std::string str, std::unordered_map<std::string, std::string> values);
std::string ToTemplateString() const;
std::string ToTemplateParameter(const std::string& name) const;
private:
std::string template_str;
std::unordered_map<std::string, std::string> template_values;
};
enum class DataType
{
Half,
Float,
Int8,
Int32
};
std::string ToString(DataType dt);
enum class Layout
{
Row,
Column
};
std::string ToString(Layout dl);
enum class GemmType
{
Default
};
std::string ToString(GemmType gt);
struct TensorDesc
{
DataType element;
Layout layout;
};
std::string SequenceStr(const std::vector<int>& v);
std::string MakeTuple(const std::vector<std::string>& v);
template <int... xs>
const std::string S = SequenceStr({xs...});
constexpr const char* PassThrough = "ck::tensor_operation::element_wise::PassThrough";
constexpr const char* Bilinear = "ck::tensor_operation::element_wise::Bilinear";
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cstdint>
namespace ck {
namespace host {
std::size_t integer_divide_ceil(std::size_t x, std::size_t y);
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/host/device_gemm_multiple_d/operation.hpp"
#include "ck/host/stringutils.hpp"
#include "ck/host/utils.hpp"
#include <cassert>
namespace ck {
namespace host {
namespace device_gemm_multiple_d {
static std::string GetGemmSpec(const std::size_t m,
const std::size_t n,
const std::size_t k,
const std::size_t m_per_block,
const std::size_t n_per_block,
const std::size_t k_per_block)
{
std::string spec = "";
if(integer_divide_ceil(m, m_per_block) * m_per_block - m != 0)
spec += "M";
if(integer_divide_ceil(n, n_per_block) * n_per_block - n != 0)
spec += "N";
if(integer_divide_ceil(k, k_per_block) * k_per_block - k != 0)
spec += "K";
if(spec == "")
return "ck::tensor_operation::device::GemmSpecialization::Default";
return "ck::tensor_operation::device::GemmSpecialization::" + spec + "Padding";
}
template <class F>
std::vector<Operation_Xdl_CShuffle> CreateOperationsImpl(F f)
{
std::vector<Operation_Xdl_CShuffle> result;
// Tile Desc: (block_size, m_per_block, n_per_block, k_per_block, ak1, bk1,
// m_per_XDL, n_per_XDL, m_Xdl_per_wave, n_Xdl_per_wave, num_gemmk_prefetch_stage)
std::vector<operation::TileDesc> tile_descriptions = {
{256, 256, 128, 32, 8, 8, 32, 32, 4, 2, 1},
{256, 128, 256, 32, 8, 8, 32, 32, 2, 4, 1},
{128, 128, 128, 32, 8, 8, 32, 32, 4, 2, 1},
{256, 128, 128, 32, 8, 8, 32, 32, 2, 2, 1},
{128, 128, 64, 32, 8, 8, 32, 32, 2, 2, 1},
{128, 64, 128, 32, 8, 8, 32, 32, 2, 2, 1},
{64, 64, 64, 32, 8, 8, 32, 32, 2, 2, 1},
{256, 128, 64, 32, 8, 8, 32, 32, 2, 1, 1},
{256, 64, 128, 32, 8, 8, 32, 32, 1, 2, 1},
{128, 128, 32, 32, 8, 8, 32, 32, 2, 1, 1},
{128, 32, 128, 32, 8, 8, 32, 32, 1, 2, 1},
{64, 64, 32, 32, 8, 8, 32, 32, 2, 1, 1},
{64, 32, 64, 32, 8, 8, 32, 32, 1, 2, 1},
};
// BlockTransferDesc: (thread_cluster_length, thread_cluster_arrange_order, src_access_order,
// src_vec_dim, src_scalar_per_vector, dst_scalar_per_vector_k1, lds_add_extra_dim )
std::vector<operation::BlockTransferDesc> a_block_descriptions = {
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
};
std::vector<operation::BlockTransferDesc> b_block_descriptions = {
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
{S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, 1},
};
// cshuffle_descriptions: (m_Xdl_per_wave_per_shuffle, n_Xdl_per_wave_per_shuffle)
std::vector<operation::CShuffleDesc> cshuffle_descriptions = {
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
{1, 1},
};
// CBlockTransferDesc: (cluster_lengths_m_block_m_wave_m_per_Xdl_n_block_n_wave_n_per_Xdl,
// scalar_per_vector_n_wave_n_per_Xdl)
std::vector<operation::CBlockTransferDesc> c_block_descriptions = {
{S<1, 32, 1, 8>, 8},
{S<1, 32, 1, 8>, 8},
{S<1, 16, 1, 8>, 8},
{S<1, 32, 1, 8>, 8},
{S<1, 32, 1, 4>, 8},
{S<1, 16, 1, 8>, 8},
{S<1, 16, 1, 4>, 8},
{S<1, 32, 1, 8>, 8},
{S<1, 32, 1, 8>, 8},
{S<1, 32, 1, 4>, 8},
{S<1, 16, 1, 8>, 8},
{S<1, 16, 1, 4>, 8},
{S<1, 16, 1, 4>, 8},
};
assert(tile_descriptions.size() == a_block_descriptions.size());
assert(tile_descriptions.size() == b_block_descriptions.size());
assert(tile_descriptions.size() == cshuffle_descriptions.size());
assert(tile_descriptions.size() == c_block_descriptions.size());
for(std::size_t i = 0; i < tile_descriptions.size(); i++)
{
Operation_Xdl_CShuffle x;
x.tile_desc = tile_descriptions[i];
x.a_block_transfer = a_block_descriptions[i];
x.b_block_transfer = b_block_descriptions[i];
x.cshuffle = cshuffle_descriptions[i];
x.c_block_transfer = c_block_descriptions[i];
auto all = f(x);
result.insert(result.end(), all.begin(), all.end());
}
return result;
}
static Layout ToLayout(bool Trans) { return Trans ? Layout::Column : Layout::Row; }
std::vector<Operation_Xdl_CShuffle> Operation_Xdl_CShuffle::CreateOperations()
{
return CreateOperationsImpl([](auto x) -> std::vector<Operation_Xdl_CShuffle> { return {x}; });
}
std::vector<Operation_Xdl_CShuffle> Operation_Xdl_CShuffle::CreateOperations(const Problem& prob)
{
return CreateOperationsImpl(
[&](Operation_Xdl_CShuffle x) -> std::array<Operation_Xdl_CShuffle, 1> {
x.A = TensorDesc{prob.ADataType, ToLayout(prob.TransA)};
x.B = TensorDesc{prob.BDataType, ToLayout(prob.TransB)};
x.E = TensorDesc{prob.EDataType, ToLayout(prob.TransE)};
x.Ds = Transform(prob.DsTrans, prob.DsDataType, [](auto trans, auto dt) {
return TensorDesc{dt, ToLayout(trans)};
});
x.a_elem_op = prob.AElementOp;
x.b_elem_op = prob.BElementOp;
x.cde_elem_op = prob.CDEElementOp;
x.gemm_specialization = GetGemmSpec(prob.M,
prob.N,
prob.K,
x.tile_desc.m_per_block,
x.tile_desc.n_per_block,
x.tile_desc.k_per_block);
return {x};
});
}
static const char* const DeviceGemmMultipleD_Xdl_CShuffleTemplate =
"ck::tensor_operation::device::DeviceGemmMultipleD_Xdl_CShuffle<${LayoutA}, ${LayoutB}, "
"${LayoutDs}, ${LayoutE}, ${ADataType}, ${BDataType}, ${AccDataType}, ${CShuffleDataType}, "
"${DsDataType}, ${EDataType}, ${AElementwiseOperation}, ${BElementwiseOperation}, "
"${CDEElementwiseOperation}, ${GemmSpecialization}, ${NumGemmkPrefetchStage}, ${BlockSize}, "
"${MPerBlock}, ${NPerBlock}, ${KPerBlock}, ${AK1}, ${BK1}, ${MPerXDL}, ${NPerXDL}, "
"${MXdlPerWave}, ${NXdlPerWave}, ${ABlockTransferThreadClusterLengths_AK0_M_AK1}, "
"${ABlockTransferThreadClusterArrangeOrder}, ${ABlockTransferSrcAccessOrder}, "
"${ABlockTransferSrcVectorDim}, ${ABlockTransferSrcScalarPerVector}, "
"${ABlockTransferDstScalarPerVector_AK1}, ${ABlockLdsExtraM}, "
"${BBlockTransferThreadClusterLengths_BK0_N_BK1}, ${BBlockTransferThreadClusterArrangeOrder}, "
"${BBlockTransferSrcAccessOrder}, ${BBlockTransferSrcVectorDim}, "
"${BBlockTransferSrcScalarPerVector}, ${BBlockTransferDstScalarPerVector_BK1}, "
"${BBlockLdsExtraN}, ${CShuffleMXdlPerWavePerShuffle}, ${CShuffleNXdlPerWavePerShuffle}, "
"${CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock}, "
"${CDEBlockTransferScalarPerVector_NPerBlock}>";
Solution Operation_Xdl_CShuffle::ToSolution() const
{
std::unordered_map<std::string, std::string> values = {
{"LayoutA", ToString(this->A.layout)},
{"LayoutB", ToString(this->B.layout)},
{"LayoutDs",
MakeTuple(Transform(this->Ds, [](auto tensor) { return ToString(tensor.layout); }))},
{"LayoutE", ToString(this->E.layout)},
{"ADataType", ToString(this->A.element)},
{"BDataType", ToString(this->B.element)},
{"AccDataType", ToString(this->acc)},
{"CShuffleDataType", ToString(this->cs_type)},
{"DsDataType",
MakeTuple(Transform(this->Ds, [](auto tensor) { return ToString(tensor.element); }))},
{"EDataType", ToString(this->E.element)},
{"AElementwiseOperation", this->a_elem_op},
{"BElementwiseOperation", this->b_elem_op},
{"CDEElementwiseOperation", this->cde_elem_op},
{"GemmSpecialization", this->gemm_specialization},
{"NumGemmkPrefetchStage", std::to_string(this->tile_desc.num_gemmk_prefetch_stage)},
{"BlockSize", std::to_string(this->tile_desc.block_size)},
{"MPerBlock", std::to_string(this->tile_desc.m_per_block)},
{"NPerBlock", std::to_string(this->tile_desc.n_per_block)},
{"KPerBlock", std::to_string(this->tile_desc.k_per_block)},
{"AK1", std::to_string(this->tile_desc.ak1)},
{"BK1", std::to_string(this->tile_desc.bk1)},
{"MPerXDL", std::to_string(this->tile_desc.m_per_XDL)},
{"NPerXDL", std::to_string(this->tile_desc.n_per_XDL)},
{"MXdlPerWave", std::to_string(this->tile_desc.m_Xdl_per_wave)},
{"NXdlPerWave", std::to_string(this->tile_desc.n_Xdl_per_wave)},
{"ABlockTransferThreadClusterLengths_AK0_M_AK1",
this->a_block_transfer.thread_cluster_length},
{"ABlockTransferThreadClusterArrangeOrder",
this->a_block_transfer.thread_cluster_arrange_order},
{"ABlockTransferSrcAccessOrder", this->a_block_transfer.src_access_order},
{"ABlockTransferSrcVectorDim", std::to_string(this->a_block_transfer.src_vec_dim)},
{"ABlockTransferSrcScalarPerVector",
std::to_string(this->a_block_transfer.src_scalar_per_vector)},
{"ABlockTransferDstScalarPerVector_AK1",
std::to_string(this->a_block_transfer.dst_scalar_per_vector_k1)},
{"ABlockLdsExtraM", std::to_string(this->a_block_transfer.lds_add_extra_dim)},
{"BBlockTransferThreadClusterLengths_BK0_N_BK1",
this->b_block_transfer.thread_cluster_length},
{"BBlockTransferThreadClusterArrangeOrder",
this->b_block_transfer.thread_cluster_arrange_order},
{"BBlockTransferSrcAccessOrder", this->b_block_transfer.src_access_order},
{"BBlockTransferSrcVectorDim", std::to_string(this->b_block_transfer.src_vec_dim)},
{"BBlockTransferSrcScalarPerVector",
std::to_string(this->b_block_transfer.src_scalar_per_vector)},
{"BBlockTransferDstScalarPerVector_BK1",
std::to_string(this->b_block_transfer.dst_scalar_per_vector_k1)},
{"BBlockLdsExtraN", std::to_string(this->b_block_transfer.lds_add_extra_dim)},
{"CShuffleMXdlPerWavePerShuffle",
std::to_string(this->cshuffle.m_Xdl_per_wave_per_shuffle)},
{"CShuffleNXdlPerWavePerShuffle",
std::to_string(this->cshuffle.n_Xdl_per_wave_per_shuffle)},
{"CDEBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock",
this->c_block_transfer.cluster_lengths_m_block_m_wave_m_per_Xdl_n_block_n_wave_n_per_Xdl},
{"CDEBlockTransferScalarPerVector_NPerBlock",
std::to_string(this->c_block_transfer.scalar_per_vector_n_wave_n_per_Xdl)},
};
return Solution{InterpolateString(DeviceGemmMultipleD_Xdl_CShuffleTemplate, values),
std::move(values)};
}
} // namespace device_gemm_multiple_d
} // namespace host
} // namespace ck
#include "ck/host/headers.hpp"
#include "ck_headers.hpp"
namespace ck {
namespace host {
const std::string config_header = "";
std::unordered_map<std::string, std::pair<const char*, const char*>> GetHeaders()
{
auto headers = ck_headers();
headers.insert(std::make_pair(
"ck/config.h",
std::make_pair(config_header.data(), config_header.data() + config_header.size())));
return headers;
}
} // namespace host
} // namespace ck
\ No newline at end of file
#include "ck/host/types.hpp"
#include "ck/host/stringutils.hpp"
#include <algorithm>
#include <stdexcept>
namespace ck {
namespace host {
Solution::Solution(std::string str, std::unordered_map<std::string, std::string> values)
: template_str(std::move(str)), template_values(std::move(values))
{
}
std::string Solution::ToTemplateString() const { return this->template_str; }
std::string Solution::ToTemplateParameter(const std::string& name) const
{
return this->template_values.at(name);
}
std::string ToString(DataType dt)
{
switch(dt)
{
case DataType::Float: return "float";
case DataType::Half: return "ck::half_t";
case DataType::Int8: return "int8_t";
case DataType::Int32: return "int32_t";
}
throw std::runtime_error("Incorrect data type");
}
std::string ToString(Layout dl)
{
switch(dl)
{
case Layout::Row: return "ck::tensor_layout::gemm::RowMajor";
case Layout::Column: return "ck::tensor_layout::gemm::ColumnMajor";
}
throw std::runtime_error("Incorrect layout");
}
std::string ToString(GemmType gt)
{
switch(gt)
{
case GemmType::Default: return "ck::tensor_operation::device::GemmSpecialization::Default";
}
throw std::runtime_error("Incorrect gemm type");
}
std::string SequenceStr(const std::vector<int>& v)
{
return "ck::Sequence<" +
JoinStrings(Transform(v, [](int x) { return std::to_string(x); }), ", ") + ">";
}
std::string MakeTuple(const std::vector<std::string>& v)
{
return "ck::Tuple<" + JoinStrings(v, ", ") + ">";
}
} // namespace host
} // namespace ck
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
#include "ck/host/utils.hpp"
namespace ck {
namespace host {
std::size_t integer_divide_ceil(std::size_t x, std::size_t y)
{
return (x + y - std::size_t{1}) / y;
}
} // namespace host
} // namespace ck
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
add_subdirectory(rtc)
file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp)
foreach(TEST_SRC ${TEST_SRCS})
get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE)
rocm_add_test_executable(test_host_${BASE_NAME} ${TEST_SRC})
target_link_libraries(test_host_${BASE_NAME} ck_rtc ck_host)
target_include_directories(test_host_${BASE_NAME} PUBLIC include())
endforeach()
#include "ck/host/device_gemm_multiple_d/problem.hpp"
#include "ck/host/device_gemm_multiple_d/operation.hpp"
#include "ck/host/headers.hpp"
#include "ck/host/stringutils.hpp"
#include <algorithm>
#include <iterator>
#include <test.hpp>
#include <rtc/compile_kernel.hpp>
const std::string compile_check = R"__ck__(
#include <${include}>
extern "C" __global__ void f() {
using type = ${template}::DeviceOp;
}
)__ck__";
std::vector<rtc::src_file> get_headers_for_test()
{
std::vector<rtc::src_file> result;
auto hs = ck::host::GetHeaders();
std::transform(
hs.begin(), hs.end(), std::back_inserter(result), [&](const auto& p) -> rtc::src_file {
auto s = p.second;
std::string content{s.first, s.second};
return {p.first, content};
});
return result;
}
TEST_CASE(test_operation)
{
ck::host::device_gemm_multiple_d::Problem prob;
prob.M = 256;
prob.N = 256;
prob.K = 256;
auto ops = ck::host::device_gemm_multiple_d::Operation_Xdl_CShuffle::CreateOperations(prob);
for(auto op : ops)
{
auto solution = op.ToSolution();
std::string include =
"ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp";
auto src = ck::host::InterpolateString(
compile_check, {{"include", include}, {"template", solution.ToTemplateString()}});
auto srcs = get_headers_for_test();
srcs.push_back({"main.cpp", src});
rtc::compile_options options;
options.kernel_name = "f";
rtc::compile_kernel(srcs, options);
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <atomic>
#include <algorithm>
#include <cassert>
#include <cstdio>
#include <cstdlib>
#include <functional>
#include <iostream>
#include <sstream>
#include <type_traits>
#include <unordered_map>
#include <vector>
#ifdef __linux__
#include <unistd.h>
#endif
#ifndef MIGRAPHX_GUARD_TEST_TEST_HPP
#define MIGRAPHX_GUARD_TEST_TEST_HPP
namespace test {
// clang-format off
// NOLINTNEXTLINE
#define TEST_FOREACH_BINARY_OPERATORS(m) \
m(==, equal) \
m(!=, not_equal) \
m(<=, less_than_equal) \
m(>=, greater_than_equal) \
m(<, less_than) \
m(>, greater_than) \
m(and, and_op) \
m(or, or_op)
// clang-format on
// clang-format off
// NOLINTNEXTLINE
#define TEST_FOREACH_UNARY_OPERATORS(m) \
m(not, not_op)
// clang-format on
// NOLINTNEXTLINE
#define TEST_EACH_BINARY_OPERATOR_OBJECT(op, name) \
struct name \
{ \
static std::string as_string() { return #op; } \
template <class T, class U> \
static decltype(auto) call(T&& x, U&& y) \
{ \
return x op y; \
} \
};
// NOLINTNEXTLINE
#define TEST_EACH_UNARY_OPERATOR_OBJECT(op, name) \
struct name \
{ \
static std::string as_string() { return #op; } \
template <class T> \
static decltype(auto) call(T&& x) \
{ \
return op x; \
} \
};
TEST_FOREACH_BINARY_OPERATORS(TEST_EACH_BINARY_OPERATOR_OBJECT)
TEST_FOREACH_UNARY_OPERATORS(TEST_EACH_UNARY_OPERATOR_OBJECT)
struct nop
{
static std::string as_string() { return ""; }
template <class T>
static auto call(T&& x)
{
return static_cast<T&&>(x);
}
};
struct function
{
static std::string as_string() { return ""; }
template <class T>
static decltype(auto) call(T&& x)
{
return x();
}
};
template <class Stream, class Iterator>
Stream& stream_range(Stream& s, Iterator start, Iterator last);
template <class Stream>
inline Stream& operator<<(Stream& s, std::nullptr_t)
{
s << "nullptr";
return s;
}
template <class Stream,
class Range,
class = typename std::enable_if<not std::is_convertible<Range, std::string>{}>::type>
inline auto operator<<(Stream& s, const Range& v) -> decltype(stream_range(s, v.begin(), v.end()))
{
s << "{ ";
stream_range(s, v.begin(), v.end());
s << "}";
return s;
}
template <class Stream, class Iterator>
inline Stream& stream_range(Stream& s, Iterator start, Iterator last)
{
if(start != last)
{
s << *start;
std::for_each(std::next(start), last, [&](auto&& x) { s << ", " << x; });
}
return s;
}
template <class T>
const T& get_value(const T& x)
{
return x;
}
template <class T, class Operator = nop>
struct lhs_expression;
template <class T>
lhs_expression<T> make_lhs_expression(T&& lhs);
template <class T, class Operator>
lhs_expression<T, Operator> make_lhs_expression(T&& lhs, Operator);
// NOLINTNEXTLINE
#define TEST_EXPR_BINARY_OPERATOR(op, name) \
template <class V> \
auto operator op(const V& rhs2) const \
{ \
return make_expression(*this, rhs2, name{}); /* NOLINT */ \
}
// NOLINTNEXTLINE
#define TEST_EXPR_UNARY_OPERATOR(op, name) \
auto operator op() const { return make_lhs_expression(lhs, name{}); /* NOLINT */ }
template <class T, class U, class Operator>
struct expression
{
T lhs;
U rhs;
friend std::ostream& operator<<(std::ostream& s, const expression& self)
{
s << self.lhs << " " << Operator::as_string() << " " << self.rhs;
return s;
}
friend decltype(auto) get_value(const expression& e) { return e.value(); }
decltype(auto) value() const { return Operator::call(get_value(lhs), get_value(rhs)); };
TEST_FOREACH_UNARY_OPERATORS(TEST_EXPR_UNARY_OPERATOR)
TEST_FOREACH_BINARY_OPERATORS(TEST_EXPR_BINARY_OPERATOR)
};
// TODO: Remove rvalue references
template <class T, class U, class Operator>
expression<T, U, Operator> make_expression(T&& rhs, U&& lhs, Operator)
{
return {std::forward<T>(rhs), std::forward<U>(lhs)};
}
// TODO: Remove rvalue reference
template <class T>
lhs_expression<T> make_lhs_expression(T&& lhs)
{
return lhs_expression<T>{std::forward<T>(lhs)};
}
template <class T, class Operator>
lhs_expression<T, Operator> make_lhs_expression(T&& lhs, Operator)
{
return lhs_expression<T, Operator>{std::forward<T>(lhs)};
}
template <class T, class Operator>
struct lhs_expression
{
T lhs;
explicit lhs_expression(T e) : lhs(e) {}
friend std::ostream& operator<<(std::ostream& s, const lhs_expression& self)
{
std::string op = Operator::as_string();
if(not op.empty())
s << Operator::as_string() << " ";
s << self.lhs;
return s;
}
friend decltype(auto) get_value(const lhs_expression& e) { return e.value(); }
decltype(auto) value() const { return Operator::call(get_value(lhs)); }
TEST_FOREACH_BINARY_OPERATORS(TEST_EXPR_BINARY_OPERATOR)
TEST_FOREACH_UNARY_OPERATORS(TEST_EXPR_UNARY_OPERATOR)
// NOLINTNEXTLINE
#define TEST_LHS_REOPERATOR(op) \
template <class U> \
auto operator op(const U& rhs) const \
{ \
return make_lhs_expression(lhs op rhs); \
}
TEST_LHS_REOPERATOR(+)
TEST_LHS_REOPERATOR(-)
TEST_LHS_REOPERATOR(*)
TEST_LHS_REOPERATOR(/)
TEST_LHS_REOPERATOR(%)
TEST_LHS_REOPERATOR(&)
TEST_LHS_REOPERATOR(|)
TEST_LHS_REOPERATOR(^)
};
template <class F>
struct predicate
{
std::string msg;
F f;
friend std::ostream& operator<<(std::ostream& s, const predicate& self)
{
s << self.msg;
return s;
}
decltype(auto) operator()() const { return f(); }
operator decltype(auto)() const { return f(); }
};
template <class F>
auto make_predicate(const std::string& msg, F f)
{
return make_lhs_expression(predicate<F>{msg, f}, function{});
}
inline std::string as_string(bool x)
{
if(x)
return "true";
return "false";
}
template <class T>
std::string as_string(const T& x)
{
std::stringstream ss;
ss << x;
return ss.str();
}
template <class Iterator>
std::string as_string(Iterator start, Iterator last)
{
std::stringstream ss;
stream_range(ss, start, last);
return ss.str();
}
template <class F>
auto make_function(const std::string& name, F f)
{
return [=](auto&&... xs) {
std::vector<std::string> args = {as_string(xs)...};
return make_predicate(name + "(" + as_string(args.begin(), args.end()) + ")",
[=] { return f(xs...); });
};
}
struct capture
{
template <class T>
auto operator->*(const T& x) const
{
return make_lhs_expression(x);
}
template <class T, class Operator>
auto operator->*(const lhs_expression<T, Operator>& x) const
{
return x;
}
};
enum class color
{
reset = 0,
bold = 1,
underlined = 4,
fg_red = 31,
fg_green = 32,
fg_yellow = 33,
fg_blue = 34,
fg_default = 39,
bg_red = 41,
bg_green = 42,
bg_yellow = 43,
bg_blue = 44,
bg_default = 49
};
inline std::ostream& operator<<(std::ostream& os, const color& c)
{
#ifndef _WIN32
static const bool use_color = isatty(STDOUT_FILENO) != 0;
if(use_color)
return os << "\033[" << static_cast<std::size_t>(c) << "m";
#endif
return os;
}
inline std::atomic<int>& failures()
{
// NOLINTNEXTLINE
static std::atomic<int> f = 0;
return f;
}
template <class T, class F>
void failed(T x, const char* msg, const char* func, const char* file, int line, F f)
{
if(not bool(x.value()))
{
failures()++;
std::cout << func << std::endl;
std::cout << file << ":" << line << ":" << std::endl;
std::cout << color::bold << color::fg_red << " FAILED: " << color::reset << msg << " "
<< "[ " << x << " ]" << std::endl;
f();
}
}
template <class F>
bool throws(F f)
{
try
{
f();
return false;
}
catch(...)
{
return true;
}
}
template <class Exception, class F>
bool throws(F f, const std::string& msg = "")
{
try
{
f();
return false;
}
catch(const Exception& ex)
{
return std::string(ex.what()).find(msg) != std::string::npos;
}
}
template <class T, class U>
auto within_abs(T px, U py, double ptol = 1e-6f)
{
return make_function("near", [](auto x, auto y, auto tol) { return std::abs(x - y) < tol; })(
px, py, ptol);
}
using string_map = std::unordered_map<std::string, std::vector<std::string>>;
template <class Keyword>
string_map generic_parse(std::vector<std::string> as, Keyword keyword)
{
string_map result;
std::string flag;
for(auto&& x : as)
{
auto f = keyword(x);
if(f.empty())
{
result[flag].push_back(x);
}
else
{
flag = f.front();
result[flag]; // Ensure the flag exists
flag = f.back();
}
}
return result;
}
using test_case = std::function<void()>;
inline auto& get_test_cases()
{
// NOLINTNEXTLINE
static std::vector<std::pair<std::string, test_case>> cases;
return cases;
}
inline void add_test_case(std::string name, test_case f)
{
get_test_cases().emplace_back(std::move(name), std::move(f));
}
struct auto_register_test_case
{
template <class F>
auto_register_test_case(const char* name, F f) noexcept
{
add_test_case(name, f);
}
};
struct failure_error
{
};
[[noreturn]] inline void fail() { throw failure_error{}; }
struct driver
{
driver()
{
add_flag({"--help", "-h"}, "Show help");
add_flag({"--list", "-l"}, "List all test cases");
add_flag({"--continue", "-c"}, "Continue after failure");
add_flag({"--quiet", "-q"}, "Don't print out extra output");
}
struct argument
{
std::vector<std::string> flags = {};
std::string help = "";
int nargs = 1;
};
void add_arg(const std::vector<std::string>& flags, const std::string& help = "")
{
arguments.push_back(argument{flags, help, 1});
}
void add_flag(const std::vector<std::string>& flags, const std::string& help = "")
{
arguments.push_back(argument{flags, help, 0});
}
void show_help(const std::string& exe) const
{
std::cout << std::endl;
std::cout << color::fg_yellow << "USAGE:" << color::reset << std::endl;
std::cout << " ";
std::cout << exe << " <test-case>... <options>" << std::endl;
std::cout << std::endl;
std::cout << color::fg_yellow << "ARGS:" << color::reset << std::endl;
std::cout << " ";
std::cout << color::fg_green << "<test-case>..." << color::reset;
std::cout << std::endl;
std::cout << " "
<< "Test case name to run" << std::endl;
std::cout << std::endl;
std::cout << color::fg_yellow << "OPTIONS:" << color::reset << std::endl;
for(auto&& arg : arguments)
{
std::string prefix = " ";
std::cout << color::fg_green;
for(const std::string& a : arg.flags)
{
std::cout << prefix;
std::cout << a;
prefix = ", ";
}
std::cout << color::reset << std::endl;
std::cout << " " << arg.help << std::endl;
}
}
std::ostream& out() const
{
struct null_buffer : std::streambuf
{
virtual int overflow(int c) override { return c; }
};
static null_buffer buffer;
static std::ostream null_stream(&buffer);
if(quiet)
return null_stream;
return std::cout;
}
string_map parse(int argc, const char* argv[]) const
{
std::vector<std::string> args(argv + 1, argv + argc);
string_map keys;
for(auto&& arg : arguments)
{
for(auto&& flag : arg.flags)
{
keys[flag] = {arg.flags.front()};
if(arg.nargs == 0)
keys[flag].push_back("");
}
}
auto result = generic_parse(args, [&](auto&& s) -> std::vector<std::string> {
if(keys.count(s) > 0)
return keys[s];
else
return {};
});
result["__exe__"].push_back(argv[0]);
return result;
}
static std::string create_command(const string_map& args)
{
std::stringstream ss;
ss << args.at("__exe__").front();
if(args.count("") > 0)
{
for(auto&& arg : args.at(""))
ss << " \"" << arg << "\"";
}
for(auto&& p : args)
{
if(p.first == "__exe__")
continue;
if(p.first.empty())
continue;
ss << " " << p.first;
for(auto&& arg : p.second)
ss << " \"" << arg << "\"";
}
return ss.str();
}
static std::string fork(const std::string& name, string_map args)
{
std::string msg;
args[""] = {name};
args.erase("--continue");
args["--quiet"];
auto cmd = create_command(args);
auto r = std::system(cmd.c_str()); // NOLINT
if(r != 0)
msg = "Exited with " + std::to_string(r);
return msg;
}
void run_test_case(const std::string& name, const test_case& f, const string_map& args)
{
ran++;
out() << color::fg_green << "[ RUN ] " << color::reset << color::bold << name
<< color::reset << std::endl;
std::string msg;
if(args.count("--continue") > 0)
{
msg = fork(name, args);
}
else
{
try
{
failures() = 0;
f();
}
// cppcheck-suppress EmptyCatchStatement
catch(const failure_error&)
{
}
}
if(msg.empty() and failures() != 0)
{
if(failures() == 1)
msg = "Test failure";
else
msg = std::to_string(failures()) + " test failures";
}
if(msg.empty())
{
out() << color::fg_green << "[ COMPLETE ] " << color::reset << color::bold << name
<< color::reset << std::endl;
}
else
{
failed.push_back(name);
out() << color::fg_red << "[ FAILED ] " << color::reset << color::bold << name
<< color::reset << ": " << color::fg_yellow << msg << color::reset << std::endl;
}
}
void run(int argc, const char* argv[])
{
auto args = parse(argc, argv);
if(args.count("--help") > 0)
{
show_help(args.at("__exe__").front());
return;
}
if(args.count("--list") > 0)
{
for(auto&& tc : get_test_cases())
out() << tc.first << std::endl;
return;
}
if(args.count("--quiet") > 0)
quiet = true;
auto cases = args[""];
if(cases.empty())
{
for(auto&& tc : get_test_cases())
run_test_case(tc.first, tc.second, args);
}
else
{
std::unordered_map<std::string, test_case> m(get_test_cases().begin(),
get_test_cases().end());
for(auto&& iname : cases)
{
for(auto&& name : get_case_names(iname))
{
auto f = m.find(name);
if(f == m.end())
{
out() << color::fg_red << "[ ERROR ] Test case '" << name
<< "' not found." << color::reset << std::endl;
failed.push_back(name);
}
else
run_test_case(name, f->second, args);
}
}
}
out() << color::fg_green << "[==========] " << color::fg_yellow << ran << " tests ran"
<< color::reset << std::endl;
if(not failed.empty())
{
out() << color::fg_red << "[ FAILED ] " << color::fg_yellow << failed.size()
<< " tests failed" << color::reset << std::endl;
for(auto&& name : failed)
out() << color::fg_red << "[ FAILED ] " << color::fg_yellow << name
<< color::reset << std::endl;
std::exit(1);
}
}
std::function<std::vector<std::string>(const std::string&)> get_case_names =
[](const std::string& name) -> std::vector<std::string> { return {name}; };
std::vector<argument> arguments = {};
std::vector<std::string> failed = {};
std::size_t ran = 0;
bool quiet = false;
};
inline void run(int argc, const char* argv[])
{
driver d{};
d.run(argc, argv);
}
} // namespace test
// NOLINTNEXTLINE
#define TEST_CAPTURE(...) test::capture{}->*__VA_ARGS__
// NOLINTNEXTLINE
#define CHECK(...) \
test::failed( \
TEST_CAPTURE(__VA_ARGS__), #__VA_ARGS__, __PRETTY_FUNCTION__, __FILE__, __LINE__, [] {})
// NOLINTNEXTLINE
#define EXPECT(...) \
test::failed(TEST_CAPTURE(__VA_ARGS__), \
#__VA_ARGS__, \
__PRETTY_FUNCTION__, \
__FILE__, \
__LINE__, \
&test::fail)
// NOLINTNEXTLINE
#define STATUS(...) EXPECT((__VA_ARGS__) == 0)
// NOLINTNEXTLINE
#define TEST_CAT(x, ...) TEST_PRIMITIVE_CAT(x, __VA_ARGS__)
// NOLINTNEXTLINE
#define TEST_PRIMITIVE_CAT(x, ...) x##__VA_ARGS__
// NOLINTNEXTLINE
#define TEST_CASE_REGISTER(...) \
static test::auto_register_test_case TEST_CAT(register_test_case_, __LINE__) = \
test::auto_register_test_case(#__VA_ARGS__, &__VA_ARGS__);
// NOLINTNEXTLINE
#define TEST_CASE(...) \
void __VA_ARGS__(); \
TEST_CASE_REGISTER(__VA_ARGS__) \
void __VA_ARGS__()
#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wglobal-constructors"
#endif
#endif
find_package(hip)
file(GLOB RTC_SOURCES CONFIGURE_DEPENDS src/*.cpp)
add_library(ck_rtc ${RTC_SOURCES})
target_include_directories(ck_rtc PUBLIC include)
target_link_libraries(ck_rtc PUBLIC hip::host)
#ifndef GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL
#include <rtc/kernel.hpp>
#include <filesystem>
#include <string>
namespace rtc {
struct src_file
{
std::filesystem::path path;
std::string content;
};
struct compile_options
{
std::string flags = "";
std::string kernel_name = "main";
};
kernel compile_kernel(const std::vector<src_file>& src,
compile_options options = compile_options{});
} // namespace rtc
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment