Unverified Commit f71af72a authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Add hip compilation (#664)



* Add compiler flags

* Add missing include

* Add filesystem header

* Formatting

* Add tmp_dir to run

* Formatting

* Kernel compilation and launching

* Formatting

* Seperate pack_args

* Formatting

* Add alignment tests

* Formatting

* Add compile test

* Formatting

* Complete compile test

* Formatting

* Use is_regular_file free function

* Fix is_regular_file call

* Fix tidy issues

* Fix tidy

* Fix tidy issue

* Print size in read_buffer to debug issue on jenkins

* Add hip flags before src file

* Fix reading output files

* Fix unsued variable warning

* Formatting

* Formatting

* Disable tidy check
Co-authored-by: default avatarShucai Xiao <shucai.xiao@amd.com>
Co-authored-by: default avatarmvermeulen <5479696+mvermeulen@users.noreply.github.com>
parent 6554639b
...@@ -4,7 +4,7 @@ CheckOptions: ...@@ -4,7 +4,7 @@ CheckOptions:
- key: bugprone-unused-return-value.CheckedFunctions - key: bugprone-unused-return-value.CheckedFunctions
value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product' value: '::std::async;::std::launder;::std::remove;::std::remove_if;::std::unique;::std::unique_ptr::release;::std::basic_string::empty;::std::vector::empty;::std::find;::std::find_if;::std::find_if_not;::std::all_of;::std::any_of;::std::none_of;::std::count;::std::count_if;::std::mismatch;::std::find_end;::std::find_first_of;::std::adjacent_find;::std::search;::std::search_n;::std::nth_element;::std::lower_bound;::std::upper_bound;::std::binary_search;::std::equal_range;::std::max;::std::max_element;::std::min;::std::min_element;::std::minmax;::std::minmax_element;::std::equal;::std::lexicographical_compare;::std::accumulate;::std::inner_product'
- key: cppcoreguidelines-macro-usage.AllowedRegexp - key: cppcoreguidelines-macro-usage.AllowedRegexp
value: 'DEBUG|FALLTHROUGH|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED' value: 'DEBUG|FALLTHROUGH|STRINGIZE|_HAS_|_THROW|_REQUIRES|_DECLARE_|_VISIT_|_REGISTER_|_GENERATE_|_DETAIL_|_TIDY_|_MANAGE_PTR|_MATCHER|DEVICE_SHARED'
- key: cppcoreguidelines-narrowing-conversions.WarnOnFloatingPointNarrowingConversion - key: cppcoreguidelines-narrowing-conversions.WarnOnFloatingPointNarrowingConversion
value: 0 value: 0
- key: modernize-loop-convert.MinConfidence - key: modernize-loop-convert.MinConfidence
......
...@@ -72,6 +72,7 @@ rocm_enable_clang_tidy( ...@@ -72,6 +72,7 @@ rocm_enable_clang_tidy(
performance-* performance-*
readability-* readability-*
-bugprone-signed-char-misuse -bugprone-signed-char-misuse
-bugprone-macro-parentheses
# Disable the aliased reserved identifiers # Disable the aliased reserved identifiers
-cert-dcl37-c -cert-dcl37-c
-cert-dcl51-cpp -cert-dcl51-cpp
......
set(check_cxx_linker_flag_patterns
FAIL_REGEX "[Uu]nrecogni[sz]ed .*option" # GNU, NAG
FAIL_REGEX "switch .* is no longer supported" # GNU
FAIL_REGEX "unknown .*option" # Clang
FAIL_REGEX "optimization flag .* not supported" # Clang
FAIL_REGEX "unknown argument ignored" # Clang (cl)
FAIL_REGEX "ignoring unknown option" # MSVC, Intel
FAIL_REGEX "warning D9002" # MSVC, any lang
FAIL_REGEX "option.*not supported" # Intel
FAIL_REGEX "invalid argument .*option" # Intel
FAIL_REGEX "ignoring option .*argument required" # Intel
FAIL_REGEX "ignoring option .*argument is of wrong type" # Intel
FAIL_REGEX "[Uu]nknown option" # HP
FAIL_REGEX "[Ww]arning: [Oo]ption" # SunPro
FAIL_REGEX "command option .* is not recognized" # XL
FAIL_REGEX "command option .* contains an incorrect subargument" # XL
FAIL_REGEX "Option .* is not recognized. Option will be ignored." # XL
FAIL_REGEX "not supported in this configuration. ignored" # AIX
FAIL_REGEX "File with unknown suffix passed to linker" # PGI
FAIL_REGEX "[Uu]nknown switch" # PGI
FAIL_REGEX "WARNING: unknown flag:" # Open64
FAIL_REGEX "Incorrect command line option:" # Borland
FAIL_REGEX "Warning: illegal option" # SunStudio 12
FAIL_REGEX "[Ww]arning: Invalid suboption" # Fujitsu
FAIL_REGEX "An invalid option .* appears on the command line" # Cray
)
function(check_cxx_linker_flag _flag _var)
set (_source "int main() { return 0; }")
include (CheckCXXSourceCompiles)
check_cxx_source_compiles("${_source}" _result ${check_cxx_linker_flag_patterns})
set(${_var} "${_result}" PARENT_SCOPE)
endfunction()
find_program(EMBED_LD ld)
find_program(EMBED_OBJCOPY objcopy)
function(generate_embed_source EMBED_NAME)
set(options)
set(oneValueArgs SRC HEADER)
set(multiValueArgs OBJECTS SYMBOLS)
cmake_parse_arguments(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(EXTERNS)
set(INIT_KERNELS)
list(LENGTH PARSE_SYMBOLS SYMBOLS_LEN)
list(LENGTH PARSE_OBJECTS OBJECTS_LEN)
if(NOT ${SYMBOLS_LEN} EQUAL ${OBJECTS_LEN})
message(FATAL_ERROR "Symbols and objects dont match: ${SYMBOLS_LEN} != ${OBJECTS_LEN}")
endif()
math(EXPR LEN "${SYMBOLS_LEN} - 1")
foreach(idx RANGE ${LEN})
list(GET PARSE_SYMBOLS ${idx} SYMBOL)
list(GET PARSE_OBJECTS ${idx} OBJECT)
set(START_SYMBOL "_binary_${SYMBOL}_start")
set(END_SYMBOL "_binary_${SYMBOL}_end")
string(APPEND EXTERNS "
extern const char ${START_SYMBOL}[];
extern const char ${END_SYMBOL}[];
")
get_filename_component(BASE_NAME "${OBJECT}" NAME)
string(APPEND INIT_KERNELS "
{ \"${BASE_NAME}\", { ${START_SYMBOL}, ${END_SYMBOL}} },
")
endforeach()
file(WRITE "${PARSE_HEADER}" "
#include <unordered_map>
const std::unordered_map<std::string, std::pair<const char*,const char*>>& ${EMBED_NAME}();
")
file(WRITE "${PARSE_SRC}" "
#include <${EMBED_NAME}.hpp>
${EXTERNS}
const std::unordered_map<std::string, std::pair<const char*,const char*>>& ${EMBED_NAME}()
{
static const std::unordered_map<std::string, std::pair<const char*,const char*>> result = {${INIT_KERNELS}};
return result;
}
")
endfunction()
function(embed_file OUTPUT_FILE OUTPUT_SYMBOL FILE)
set(${OUTPUT_FILE} "${FILE}.o" PARENT_SCOPE)
set(WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
# Glob is used to compute the relative path
get_filename_component(OUTPUT_FILE_DIR "${FILE}" DIRECTORY)
file(MAKE_DIRECTORY "${WORKING_DIRECTORY}/${OUTPUT_FILE_DIR}")
file(GLOB FILES RELATIVE ${WORKING_DIRECTORY} ${FILE})
foreach(REL_FILE ${FILES})
string(MAKE_C_IDENTIFIER "${REL_FILE}" SYMBOL)
set(${OUTPUT_SYMBOL} ${SYMBOL} PARENT_SCOPE)
add_custom_command(
OUTPUT "${FILE}.o"
COMMAND ${EMBED_LD} -r -o "${FILE}.o" -z noexecstack --format=binary "${REL_FILE}"
COMMAND ${EMBED_OBJCOPY} --rename-section .data=.rodata,alloc,load,readonly,data,contents "${FILE}.o"
WORKING_DIRECTORY ${WORKING_DIRECTORY}
DEPENDS ${FILE}
VERBATIM
)
endforeach()
endfunction()
function(add_embed_library EMBED_NAME)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/embed)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/embed/${EMBED_NAME})
set(EMBED_DIR ${CMAKE_CURRENT_BINARY_DIR}/embed/${EMBED_NAME})
set(SRC_FILE "${EMBED_DIR}/${EMBED_NAME}.cpp")
set(HEADER_FILE "${EMBED_DIR}/include/${EMBED_NAME}.hpp")
set(WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
set(OUTPUT_FILES)
set(SYMBOLS)
message(STATUS "Embedding files")
foreach(FILE ${ARGN})
embed_file(OUTPUT_FILE OUTPUT_SYMBOL ${FILE})
list(APPEND OUTPUT_FILES ${OUTPUT_FILE})
list(APPEND SYMBOLS ${OUTPUT_SYMBOL})
endforeach()
message(STATUS "Generating embedding library ${EMBED_NAME}")
generate_embed_source(${EMBED_NAME} SRC ${SRC_FILE} HEADER ${HEADER_FILE} OBJECTS ${OUTPUT_FILES} SYMBOLS ${SYMBOLS})
add_library(${EMBED_NAME} STATIC ${OUTPUT_FILES} "${SRC_FILE}")
target_include_directories(${EMBED_NAME} PUBLIC "${EMBED_DIR}/include")
set_target_properties(${EMBED_NAME} PROPERTIES POSITION_INDEPENDENT_CODE On)
endfunction()
function(get_target_property2 VAR TARGET PROPERTY)
get_target_property(_pflags ${TARGET} ${PROPERTY})
if(_pflags)
set(${VAR} ${_pflags} PARENT_SCOPE)
else()
set(${VAR} "" PARENT_SCOPE)
endif()
endfunction()
macro(append_flags FLAGS TARGET PROPERTY PREFIX)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
else()
string(APPEND ${FLAGS} " ${PREFIX}${FLAG}")
endif()
endforeach()
endmacro()
macro(append_link_flags FLAGS TARGET PROPERTY)
get_target_property2(_pflags ${TARGET} ${PROPERTY})
foreach(FLAG ${_pflags})
if(TARGET ${FLAG})
target_flags(_pflags2 ${FLAG})
string(APPEND ${FLAGS} " ${_pflags2}")
elseif(FLAG MATCHES "^-.*")
string(APPEND ${FLAGS} " ${FLAG}")
elseif(EXISTS ${FLAG})
string(APPEND ${FLAGS} " ${FLAG}")
else()
string(APPEND ${FLAGS} " -l${FLAG}")
endif()
endforeach()
endmacro()
function(target_flags FLAGS TARGET)
set(_flags)
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_OPTIONS" "")
append_flags(_flags ${TARGET} "INTERFACE_COMPILE_DEFINITIONS" "-D")
append_flags(_flags ${TARGET} "INTERFACE_INCLUDE_DIRECTORIES" "-isystem ")
append_flags(_flags ${TARGET} "INTERFACE_LINK_DIRECTORIES" "-L ")
append_flags(_flags ${TARGET} "INTERFACE_LINK_OPTIONS" "")
append_link_flags(_flags ${TARGET} "INTERFACE_LINK_LIBRARIES" "")
# message("_flags: ${_flags}")
set(${FLAGS} ${_flags} PARENT_SCOPE)
endfunction()
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
include(ROCMInstallTargets) include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers) include(ROCMPackageConfigHelpers)
include(RegisterOp) include(RegisterOp)
include(CheckCXXLinkerFlag)
add_library(migraphx add_library(migraphx
analyze_streams.cpp analyze_streams.cpp
...@@ -15,6 +16,7 @@ add_library(migraphx ...@@ -15,6 +16,7 @@ add_library(migraphx
eliminate_concat.cpp eliminate_concat.cpp
eliminate_identity.cpp eliminate_identity.cpp
eliminate_pad.cpp eliminate_pad.cpp
file_buffer.cpp
rewrite_batchnorm.cpp rewrite_batchnorm.cpp
rewrite_rnn.cpp rewrite_rnn.cpp
rewrite_pooling.cpp rewrite_pooling.cpp
...@@ -36,6 +38,7 @@ add_library(migraphx ...@@ -36,6 +38,7 @@ add_library(migraphx
register_target.cpp register_target.cpp
simplify_algebra.cpp simplify_algebra.cpp
simplify_reshapes.cpp simplify_reshapes.cpp
tmp_dir.cpp
value.cpp value.cpp
verify_args.cpp verify_args.cpp
json.cpp json.cpp
...@@ -146,6 +149,13 @@ rocm_install_targets( ...@@ -146,6 +149,13 @@ rocm_install_targets(
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
) )
check_cxx_linker_flag(-lstdc++fs HAS_LIB_STD_FILESYSTEM)
if(HAS_LIB_STD_FILESYSTEM)
target_link_libraries(migraphx PRIVATE -lstdc++fs)
endif()
find_path(HALF_INCLUDE_DIR half.hpp) find_path(HALF_INCLUDE_DIR half.hpp)
# TODO: Fix the incorrect path # TODO: Fix the incorrect path
target_include_directories(migraphx SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>) target_include_directories(migraphx SYSTEM PUBLIC $<BUILD_INTERFACE:${HALF_INCLUDE_DIR}>)
......
#include <migraphx/file_buffer.hpp>
#include <migraphx/errors.hpp>
#include <fstream>
#include <iostream>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename)
{
std::ifstream is(filename, std::ios::binary | std::ios::ate);
std::streamsize size = is.tellg();
if(size < 1)
MIGRAPHX_THROW("Invalid size for: " + filename);
is.seekg(0, std::ios::beg);
std::vector<char> buffer(size);
if(!is.read(buffer.data(), size))
MIGRAPHX_THROW("Error reading file: " + filename);
return buffer;
}
void write_buffer(const std::string& filename, const char* buffer, std::size_t size)
{
std::ofstream os(filename);
os.write(buffer, size);
}
void write_buffer(const std::string& filename, const std::vector<char>& buffer)
{
write_buffer(filename, buffer.data(), buffer.size());
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_FILE_BUFFER_HPP
#define MIGRAPHX_GUARD_RTGLIB_FILE_BUFFER_HPP
#include <migraphx/config.hpp>
#include <string>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename);
void write_buffer(const std::string& filename, const char* buffer, std::size_t size);
void write_buffer(const std::string& filename, const std::vector<char>& buffer);
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_FILESYSTEM_HPP
#define MIGRAPHX_GUARD_RTGLIB_FILESYSTEM_HPP
#include <migraphx/config.hpp>
#if defined(__has_include) && !defined(CPPCHECK)
#if __has_include(<filesystem>) && __cplusplus >= 201703L
#define MIGRAPHX_HAS_FILESYSTEM 1
#else
#define MIGRAPHX_HAS_FILESYSTEM 0
#endif
#if __has_include(<experimental/filesystem>) && __cplusplus >= 201103L
#define MIGRAPHX_HAS_FILESYSTEM_TS 1
#else
#define MIGRAPHX_HAS_FILESYSTEM_TS 0
#endif
#else
#define MIGRAPHX_HAS_FILESYSTEM 0
#define MIGRAPHX_HAS_FILESYSTEM_TS 0
#endif
#if MIGRAPHX_HAS_FILESYSTEM
#include <filesystem>
#elif MIGRAPHX_HAS_FILESYSTEM_TS
#include <experimental/filesystem>
#else
#error "No filesystem include available"
#endif
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
#if MIGRAPHX_HAS_FILESYSTEM
namespace fs = ::std::filesystem;
#elif MIGRAPHX_HAS_FILESYSTEM_TS
namespace fs = ::std::experimental::filesystem;
#endif
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -141,9 +141,9 @@ struct raw_data : raw_data_base ...@@ -141,9 +141,9 @@ struct raw_data : raw_data_base
template <class T> template <class T>
T* cast() const T* cast() const
{ {
auto&& s = static_cast<const Derived&>(*this).get_shape();
auto&& buffer = static_cast<const Derived&>(*this).data(); auto&& buffer = static_cast<const Derived&>(*this).data();
assert(s.type() == migraphx::shape::get_type<T>{}); assert(static_cast<const Derived&>(*this).get_shape().type() ==
migraphx::shape::get_type<T>{});
return reinterpret_cast<T*>(buffer); return reinterpret_cast<T*>(buffer);
} }
}; };
......
...@@ -11,6 +11,9 @@ ...@@ -11,6 +11,9 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
#define MIGRAPHX_STRINGIZE_1(...) #__VA_ARGS__
#define MIGRAPHX_STRINGIZE(...) MIGRAPHX_STRINGIZE_1(__VA_ARGS__)
inline std::string inline std::string
replace_string(std::string subject, const std::string& search, const std::string& replace) replace_string(std::string subject, const std::string& search, const std::string& replace)
{ {
......
#ifndef MIGRAPHX_GUARD_RTGLIB_TMP_DIR_HPP
#define MIGRAPHX_GUARD_RTGLIB_TMP_DIR_HPP
#include <migraphx/config.hpp>
#include <migraphx/filesystem.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct tmp_dir
{
fs::path path;
tmp_dir();
void execute(const std::string& exe, const std::string& args) const;
tmp_dir(tmp_dir const&) = delete;
tmp_dir& operator=(tmp_dir const&) = delete;
~tmp_dir();
};
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/load_save.hpp> #include <migraphx/load_save.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/msgpack.hpp> #include <migraphx/msgpack.hpp>
#include <fstream> #include <fstream>
...@@ -6,30 +7,6 @@ ...@@ -6,30 +7,6 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
std::vector<char> read_buffer(const std::string& filename)
{
std::ifstream is(filename, std::ios::binary | std::ios::ate);
std::streamsize size = is.tellg();
is.seekg(0, std::ios::beg);
std::vector<char> buffer(size);
if(!is.read(buffer.data(), size))
{
MIGRAPHX_THROW("Error reading file: " + filename);
}
return buffer;
}
void write_buffer(const std::string& filename, const char* buffer, std::size_t size)
{
std::ofstream os(filename);
os.write(buffer, size);
}
void write_buffer(const std::string& filename, const std::vector<char>& buffer)
{
write_buffer(filename, buffer.data(), buffer.size());
}
program load(const std::string& filename, const file_options& options) program load(const std::string& filename, const file_options& options)
{ {
return load_buffer(read_buffer(filename), options); return load_buffer(read_buffer(filename), options);
......
...@@ -113,6 +113,7 @@ add_library(migraphx_gpu ...@@ -113,6 +113,7 @@ add_library(migraphx_gpu
concat.cpp concat.cpp
leaky_relu.cpp leaky_relu.cpp
batch_norm_inference.cpp batch_norm_inference.cpp
kernel.cpp
write_literals.cpp write_literals.cpp
rocblas.cpp rocblas.cpp
abs.cpp abs.cpp
...@@ -131,6 +132,8 @@ add_library(migraphx_gpu ...@@ -131,6 +132,8 @@ add_library(migraphx_gpu
preallocate_param.cpp preallocate_param.cpp
rnn_variable_seq_lens.cpp rnn_variable_seq_lens.cpp
sync_device.cpp sync_device.cpp
pack_args.cpp
compile_hip.cpp
) )
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu) set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
function(register_migraphx_gpu_ops PREFIX) function(register_migraphx_gpu_ops PREFIX)
...@@ -217,6 +220,42 @@ register_op(migraphx_gpu ...@@ -217,6 +220,42 @@ register_op(migraphx_gpu
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION}) rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
# look for offload bundler
get_filename_component(CMAKE_CXX_COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH)
if(CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+$")
find_program(MIGRAPHX_OFFLOADBUNDLER_BIN clang-offload-bundler
HINTS ${CMAKE_CXX_COMPILER_PATH}
PATH_SUFFIXES bin
PATHS /opt/rocm/llvm
)
else()
find_program(MIGRAPHX_EXTRACT_KERNEL extractkernel
PATH_SUFFIXES bin
HINTS ${CMAKE_CXX_COMPILER_PATH}
PATHS
/opt/rocm/hip
/opt/rocm/hcc
/opt/rocm
)
endif()
message(STATUS "clang-offload-bundler: ${MIGRAPHX_OFFLOADBUNDLER_BIN}")
message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
# Get flags needed to compile hip
include(TargetFlags)
target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_OFFLOADBUNDLER_BIN=${MIGRAPHX_OFFLOADBUNDLER_BIN}"
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}"
)
# Workaround broken rocblas headers # Workaround broken rocblas headers
target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1) target_compile_definitions(migraphx_gpu PUBLIC -D__HIP_PLATFORM_HCC__=1)
target_compile_options(migraphx_gpu PRIVATE -std=c++17) target_compile_options(migraphx_gpu PRIVATE -std=c++17)
......
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/file_buffer.hpp>
#include <migraphx/tmp_dir.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/errors.hpp>
#include <cassert>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
bool is_hcc_compiler()
{
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "hcc");
return result;
}
bool is_hip_clang_compiler()
{
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "clang++");
return result;
}
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{
std::vector<std::vector<char>> hsacos;
if(not is_hcc_compiler() and not is_hip_clang_compiler())
MIGRAPHX_THROW("Unknown hip compiler: " +
std::string(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER)));
assert(not srcs.empty());
tmp_dir td{};
if(params.find("-std=") == std::string::npos)
params += " --std=c++17";
params += " -fno-gpu-rdc";
params += " -c";
if(is_hcc_compiler())
{
params += " -amdgpu-target=" + arch;
}
else if(is_hip_clang_compiler())
{
params += " --cuda-gpu-arch=" + arch;
params += " --cuda-device-only";
params += " -O3 ";
}
params += " -Wno-unused-command-line-argument -I. ";
params += MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_FLAGS);
std::string output_flags{};
for(const auto& src : srcs)
{
fs::path full_path = td.path / src.path;
fs::path parent_path = full_path.parent_path();
fs::create_directories(parent_path);
write_buffer(full_path.string(), src.content.first, src.len());
if(src.path.extension().string() == ".cpp")
{
params += " " + src.path.filename().string();
output_flags = " -o " + src.path.stem().string() + ".o";
}
}
params += output_flags;
td.execute(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), params);
for(const auto& entry : fs::directory_iterator{td.path})
{
const auto& obj_path = entry.path();
if(not fs::is_regular_file(obj_path))
continue;
if(obj_path.extension() != ".o")
continue;
if(is_hcc_compiler())
{
// call extract kernel
td.execute(MIGRAPHX_STRINGIZE(MIGRAPHX_EXTRACT_KERNEL), " -i " + obj_path.string());
}
if(is_hip_clang_compiler())
{
// call clang-offload-bundler
td.execute(MIGRAPHX_STRINGIZE(MIGRAPHX_OFFLOADBUNDLER_BIN),
"--type=o --targets=hip-amdgcn-amd-amdhsa-" + arch +
" --inputs=" + obj_path.string() + " --outputs=" + obj_path.string() +
".hsaco --unbundle");
}
}
for(const auto& entry : fs::directory_iterator{td.path})
{
const auto& obj_path = entry.path();
if(not fs::is_regular_file(obj_path))
continue;
if(obj_path.extension() != ".hsaco")
continue;
hsacos.push_back(read_buffer(obj_path.string()));
}
return hsacos;
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#ifndef MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#define MIGRAPHX_GUARD_RTGLIB_COMPILE_HIP_HPP
#include <migraphx/config.hpp>
#include <migraphx/filesystem.hpp>
#include <string>
#include <utility>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct src_file
{
fs::path path;
std::pair<const char*, const char*> content;
std::size_t len() const { return content.second - content.first; }
};
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#define MIGRAPHX_GUARD_RTGLIB_KERNEL_HPP
#include <migraphx/config.hpp>
#include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h>
#include <memory>
#include <string>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct kernel_impl;
struct kernel
{
kernel() = default;
kernel(const std::vector<char>& image, const std::string& name);
void launch(hipStream_t stream,
std::size_t global,
std::size_t local,
const std::vector<kernel_argument>& args);
auto launch(hipStream_t stream, std::size_t global, std::size_t local)
{
return [=](auto&&... xs) { launch(stream, global, local, {xs...}); };
}
private:
std::shared_ptr<kernel_impl> impl;
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#define MIGRAPHX_GUARD_RTGLIB_PACK_ARGS_HPP
#include <migraphx/config.hpp>
#include <migraphx/requires.hpp>
#include <utility>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct kernel_argument
{
template <class T,
class U = std::remove_reference_t<T>,
MIGRAPHX_REQUIRES(not std::is_base_of<kernel_argument, T>{})>
kernel_argument(T&& x) : size(sizeof(U)), align(alignof(U)), data(&x) // NOLINT
{
}
std::size_t size;
std::size_t align;
void* data;
};
std::vector<char> pack_args(const std::vector<kernel_argument>& args);
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
#include <migraphx/gpu/kernel.hpp>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/gpu/pack_args.hpp>
// extern declare the function since hip/hip_ext.h header is broken
extern hipError_t hipExtModuleLaunchKernel(hipFunction_t, // NOLINT
uint32_t,
uint32_t,
uint32_t,
uint32_t,
uint32_t,
uint32_t,
size_t,
hipStream_t,
void**,
void**,
hipEvent_t = nullptr,
hipEvent_t = nullptr,
uint32_t = 0);
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
extern std::string hip_error(int error);
using hip_module_ptr = MIGRAPHX_MANAGE_PTR(hipModule_t, hipModuleUnload);
struct kernel_impl
{
hip_module_ptr module = nullptr;
hipFunction_t fun = nullptr;
};
hip_module_ptr load_module(const std::vector<char>& image)
{
hipModule_t raw_m;
auto status = hipModuleLoadData(&raw_m, image.data());
hip_module_ptr m{raw_m};
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to load module: " + hip_error(status));
return m;
}
kernel::kernel(const std::vector<char>& image, const std::string& name)
: impl(std::make_shared<kernel_impl>())
{
impl->module = load_module(image);
auto status = hipModuleGetFunction(&impl->fun, impl->module.get(), name.c_str());
if(hipSuccess != status)
MIGRAPHX_THROW("Failed to get function: " + name + ": " + hip_error(status));
}
void kernel::launch(hipStream_t stream,
std::size_t global,
std::size_t local,
const std::vector<kernel_argument>& args)
{
std::vector<char> kernargs = pack_args(args);
std::size_t size = kernargs.size();
void* config[] = {
// HIP_LAUNCH_PARAM_* are macros that do horrible things
#ifdef MIGRAPHX_USE_CLANG_TIDY
nullptr, kernargs.data(), nullptr, &size, nullptr
#else
HIP_LAUNCH_PARAM_BUFFER_POINTER,
kernargs.data(),
HIP_LAUNCH_PARAM_BUFFER_SIZE,
&size,
HIP_LAUNCH_PARAM_END
#endif
};
auto status = hipExtModuleLaunchKernel(impl->fun,
global,
1,
1,
local,
1,
1,
0,
stream,
nullptr,
reinterpret_cast<void**>(&config));
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to launch kernel: " + hip_error(status));
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/pack_args.hpp>
#include <migraphx/requires.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
std::vector<char> pack_args(const std::vector<kernel_argument>& args)
{
std::vector<char> kernargs;
for(auto&& arg : args)
{
std::size_t n = arg.size;
const auto* p = static_cast<const char*>(arg.data);
// Insert padding
std::size_t padding = (arg.align - (kernargs.size() % arg.align)) % arg.align;
kernargs.insert(kernargs.end(), padding, 0);
kernargs.insert(kernargs.end(), p, p + n);
}
return kernargs;
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment