Unverified Commit 91cc7242 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

hipRTC fixes (#1531)

Added CMakeFlag for hipRTC. MIGRAPHX_USE_HIPRTC.
Added stages in Jenkins for hipRTC.
Fixes for some of the pending issues from hipRTC.
parent a4b82653
...@@ -15,11 +15,13 @@ def rocmtestnode(Map conf) { ...@@ -15,11 +15,13 @@ def rocmtestnode(Map conf) {
def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++") def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++")
def flags = bconf.get("flags", "") def flags = bconf.get("flags", "")
def gpu_debug = bconf.get("gpu_debug", "0") def gpu_debug = bconf.get("gpu_debug", "0")
def hiprtc_workarounds = bconf.get("hiprtc_workarounds", "0")
def cmd = """ def cmd = """
ulimit -c unlimited ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt" export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug} export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=${hiprtc_workarounds}
export CXX=${compiler} export CXX=${compiler}
export CXXFLAGS='-Werror' export CXXFLAGS='-Werror'
env env
...@@ -110,6 +112,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build -> ...@@ -110,6 +112,10 @@ rocmtest clang_debug: rocmnode('vega') { cmake_build ->
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
stash includes: 'build/*.deb', name: 'migraphx-package' stash includes: 'build/*.deb', name: 'migraphx-package'
} }
}, hiprtc_gpu_debug: rocmnode('vega') { cmake_build ->
stage('HipRTC GPU Debug') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
}
}, mlir_debug: rocmnode('vega') { cmake_build -> }, mlir_debug: rocmnode('vega') { cmake_build ->
stage('MLIR Debug') { stage('MLIR Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
......
##################################################################################### # ####################################################################################
# The MIT License (MIT) # The MIT License (MIT)
# #
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. # Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
##################################################################################### # ####################################################################################
list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc) list(APPEND CMAKE_PREFIX_PATH /opt/rocm /opt/rocm/hip /opt/rocm/hcc)
find_package(miopen) find_package(miopen)
...@@ -33,6 +33,8 @@ if(NOT TARGET MIOpen) ...@@ -33,6 +33,8 @@ if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen") message(SEND_ERROR "Cant find miopen")
endif() endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "Use hipRTC APIs")
include(Embed) include(Embed)
file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS} file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp) ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp)
...@@ -46,9 +48,10 @@ add_library(compile_for_gpu INTERFACE) ...@@ -46,9 +48,10 @@ add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns) target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored) target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE) check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE) if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device") message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device) target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif() endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
...@@ -60,11 +63,13 @@ target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURR ...@@ -60,11 +63,13 @@ target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURR
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL) add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES}) foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE) get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n") file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp) target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach() endforeach()
target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256) target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256)
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>) target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu) target_link_libraries(kernel_file_check compile_for_gpu)
...@@ -125,6 +130,7 @@ function(register_migraphx_gpu_ops PREFIX) ...@@ -125,6 +130,7 @@ function(register_migraphx_gpu_ops PREFIX)
register_op(migraphx_gpu HEADER migraphx/gpu/${OP}.hpp OPERATORS gpu::${PREFIX}${OP} INCLUDES migraphx/gpu/context.hpp) register_op(migraphx_gpu HEADER migraphx/gpu/${OP}.hpp OPERATORS gpu::${PREFIX}${OP} INCLUDES migraphx/gpu/context.hpp)
endforeach() endforeach()
endfunction() endfunction()
register_migraphx_gpu_ops(hip_ register_migraphx_gpu_ops(hip_
argmax argmax
argmin argmin
...@@ -146,47 +152,41 @@ register_migraphx_gpu_ops(miopen_ ...@@ -146,47 +152,41 @@ register_migraphx_gpu_ops(miopen_
lrn lrn
pooling pooling
) )
register_op(migraphx_gpu register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu register_op(migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot> OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp register_op(migraphx_gpu HEADER migraphx/gpu/convolution.hpp
OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution> OPERATORS gpu::miopen_convolution<op::convolution> gpu::miopen_convolution<op::deconvolution> gpu::miopen_convolution<op::quant_convolution>
INCLUDES migraphx/gpu/context.hpp) INCLUDES migraphx/gpu/context.hpp)
rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION}) rocm_set_soversion(migraphx_gpu ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_gpu) rocm_clang_tidy_check(migraphx_gpu)
# look for offload bundler
get_filename_component(CMAKE_CXX_COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH) get_filename_component(CMAKE_CXX_COMPILER_PATH "${CMAKE_CXX_COMPILER}" PATH)
if(CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+$")
find_program(MIGRAPHX_OFFLOADBUNDLER_BIN clang-offload-bundler if(NOT CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+$")
HINTS ${CMAKE_CXX_COMPILER_PATH}
PATH_SUFFIXES bin
PATHS /opt/rocm/llvm
)
else()
find_program(MIGRAPHX_EXTRACT_KERNEL extractkernel find_program(MIGRAPHX_EXTRACT_KERNEL extractkernel
PATH_SUFFIXES bin PATH_SUFFIXES bin
HINTS ${CMAKE_CXX_COMPILER_PATH} HINTS ${CMAKE_CXX_COMPILER_PATH}
PATHS PATHS
/opt/rocm/hip /opt/rocm/hip
/opt/rocm/hcc /opt/rocm/hcc
/opt/rocm /opt/rocm
) )
endif() endif()
message(STATUS "clang-offload-bundler: ${MIGRAPHX_OFFLOADBUNDLER_BIN}")
message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}") message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "") set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
if(MIGRAPHX_ENABLE_MLIR) if(MIGRAPHX_ENABLE_MLIR)
# Find package rocMLIR # Find package rocMLIR
find_package(rocMLIR 1.0.0 CONFIG REQUIRED) find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
...@@ -195,36 +195,39 @@ if(MIGRAPHX_ENABLE_MLIR) ...@@ -195,36 +195,39 @@ if(MIGRAPHX_ENABLE_MLIR)
target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler) target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler)
endif() endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
if(MIGRAPHX_USE_HIPRTC) if(MIGRAPHX_USE_HIPRTC)
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1) message(STATUS "MIGraphX is using hipRTC")
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1)
else() else()
# Get flags needed to compile hip message(STATUS "MIGraphX is using HIP Clang")
include(TargetFlags)
target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
# Skip library paths since hip will incorrectly treat it as a source file
string(APPEND HIP_COMPILER_FLAGS " ")
foreach(_unused RANGE 2)
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
endforeach()
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}") # Get flags needed to compile hip
target_compile_definitions(migraphx_gpu PRIVATE include(TargetFlags)
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}" target_flags(HIP_COMPILER_FLAGS hip::device)
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_OFFLOADBUNDLER_BIN=${MIGRAPHX_OFFLOADBUNDLER_BIN}" # Remove cuda arch flags
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}" string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
"-DMIGRAPHX_USE_HIPRTC=0" string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
)
if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER) # Skip library paths since hip will incorrectly treat it as a source file
execute_process(COMMAND which ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER) string(APPEND HIP_COMPILER_FLAGS " ")
string(STRIP "${MIGRAPHX_HIP_COMPILER_LAUNCHER}" MIGRAPHX_HIP_COMPILER_LAUNCHER)
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_HIP_COMPILER_LAUNCHER=${MIGRAPHX_HIP_COMPILER_LAUNCHER}")
endif()
foreach(_unused RANGE 2)
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
endforeach()
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}"
)
if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
execute_process(COMMAND which ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER)
string(STRIP "${MIGRAPHX_HIP_COMPILER_LAUNCHER}" MIGRAPHX_HIP_COMPILER_LAUNCHER)
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_HIP_COMPILER_LAUNCHER=${MIGRAPHX_HIP_COMPILER_LAUNCHER}")
endif()
endif() endif()
# Check miopen find mode api # Check miopen find mode api
...@@ -236,7 +239,7 @@ check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_ ...@@ -236,7 +239,7 @@ check_library_exists(MIOpen "miopenFindSolutions" "${MIOPEN_LOCATION}" HAS_FIND_
# TODO: Set default to HAS_FIND_2_API # TODO: Set default to HAS_FIND_2_API
set(MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL "") set(MIGRAPHX_USE_FIND_2_API OFF CACHE BOOL "")
if(MIGRAPHX_USE_FIND_2_API) if(MIGRAPHX_USE_FIND_2_API)
target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API) target_compile_definitions(migraphx_gpu PUBLIC -DMIGRAPHX_HAS_FIND_2_API)
message(STATUS "MIGraphx is using Find-2.0 API of MIOpen") message(STATUS "MIGraphx is using Find-2.0 API of MIOpen")
else() else()
...@@ -258,8 +261,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) ...@@ -258,8 +261,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver) add_subdirectory(driver)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_gpu migraphx_device compile_for_gpu TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
) )
...@@ -29,10 +29,9 @@ ...@@ -29,10 +29,9 @@
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
#if MIGRAPHX_USE_HIPRTC #ifdef MIGRAPHX_USE_HIPRTC
#include <hip/hiprtc.h> #include <hip/hiprtc.h>
#include <migraphx/manage_ptr.hpp> #include <migraphx/manage_ptr.hpp>
#include <migraphx/env.hpp>
#else #else
#include <migraphx/compile_src.hpp> #include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp> #include <migraphx/process.hpp>
...@@ -48,9 +47,10 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE); ...@@ -48,9 +47,10 @@ MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM); MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC); MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
#if MIGRAPHX_USE_HIPRTC #ifdef MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC) MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS);
std::string hiprtc_error(hiprtcResult err, const std::string& msg) std::string hiprtc_error(hiprtcResult err, const std::string& msg)
{ {
...@@ -143,25 +143,29 @@ struct hiprtc_program ...@@ -143,25 +143,29 @@ struct hiprtc_program
options.end(), options.end(),
std::back_inserter(c_options), std::back_inserter(c_options),
[](const std::string& s) { return s.c_str(); }); [](const std::string& s) { return s.c_str(); });
auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data()); auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data());
std::cerr << log() << std::endl; auto prog_log = log();
if(not prog_log.empty())
{
std::cerr << prog_log << std::endl;
}
if(result != HIPRTC_SUCCESS) if(result != HIPRTC_SUCCESS)
MIGRAPHX_HIPRTC_THROW(result, "Compilation failed."); MIGRAPHX_HIPRTC_THROW(result, "Compilation failed.");
} }
std::string log() std::string log() const
{ {
std::size_t n = 0; std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n)); MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n));
if(n < 2) if(n == 0)
return {}; return {};
std::vector<char> buffer(n); std::string buffer(n, '\0');
MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data())); MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data()));
assert(buffer.back() == 0); assert(buffer.back() != 0);
return {buffer.begin(), buffer.end() - 1}; return buffer;
} }
std::vector<char> get_code_obj() std::vector<char> get_code_obj() const
{ {
std::size_t n = 0; std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n)); MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n));
...@@ -176,6 +180,17 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -176,6 +180,17 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
{ {
hiprtc_program prog(srcs); hiprtc_program prog(srcs);
auto options = split_string(params, ' '); auto options = split_string(params, ' ');
options.push_back("-DMIGRAPHX_USE_HIPRTC=1");
// remove following three compilation flags for HIPRTC once fixes from hipRTC are available in
if(enabled(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS{}))
{
options.push_back("-DMIGRAPHX_HAS_DPP=0");
options.push_back("-DMIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=1");
options.push_back("-Wno-reserved-identifier");
options.push_back("-Wno-gnu-line-marker");
options.push_back("-Wno-old-style-cast");
}
if(enabled(MIGRAPHX_GPU_DEBUG{})) if(enabled(MIGRAPHX_GPU_DEBUG{}))
options.push_back("-DMIGRAPHX_DEBUG"); options.push_back("-DMIGRAPHX_DEBUG");
if(std::none_of(options.begin(), options.end(), [](const std::string& s) { if(std::none_of(options.begin(), options.end(), [](const std::string& s) {
...@@ -183,7 +198,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -183,7 +198,7 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
})) }))
options.push_back("-std=c++17"); options.push_back("-std=c++17");
options.push_back("-fno-gpu-rdc"); options.push_back("-fno-gpu-rdc");
options.push_back(" -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3")); options.push_back("-O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3"));
options.push_back("-Wno-cuda-compat"); options.push_back("-Wno-cuda-compat");
options.push_back("--offload-arch=" + arch); options.push_back("--offload-arch=" + arch);
prog.compile(options); prog.compile(options);
...@@ -292,6 +307,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std ...@@ -292,6 +307,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
return {compiler.compile(srcs)}; return {compiler.compile(srcs)};
} }
#endif // MIGRAPHX_USE_HIPRTC
std::string enum_params(std::size_t count, std::string param) std::string enum_params(std::size_t count, std::string param)
{ {
std::vector<std::string> items(count); std::vector<std::string> items(count);
...@@ -299,8 +316,6 @@ std::string enum_params(std::size_t count, std::string param) ...@@ -299,8 +316,6 @@ std::string enum_params(std::size_t count, std::string param)
return join_strings(items, ","); return join_strings(items, ",");
} }
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
...@@ -29,7 +29,6 @@ ...@@ -29,7 +29,6 @@
#include <migraphx/context.hpp> #include <migraphx/context.hpp>
#include <migraphx_kernels.hpp> #include <migraphx_kernels.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -80,6 +79,7 @@ std::string generate_args_hpp(const std::vector<shape>& inputs) ...@@ -80,6 +79,7 @@ std::string generate_args_hpp(const std::vector<shape>& inputs)
#include <migraphx/kernels/args.hpp> #include <migraphx/kernels/args.hpp>
#include <migraphx/kernels/tensor_view.hpp> #include <migraphx/kernels/tensor_view.hpp>
#include <migraphx/kernels/types.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -36,6 +36,7 @@ namespace gpu { ...@@ -36,6 +36,7 @@ namespace gpu {
namespace device { namespace device {
#ifdef MIGRAPHX_NO_DPP #ifdef MIGRAPHX_NO_DPP
template <index_int N, template <index_int N,
class Op, class Op,
class T, class T,
...@@ -62,6 +63,7 @@ __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f) ...@@ -62,6 +63,7 @@ __device__ auto block_reduce(index idx, Op op, T init, ForStride fs, F f)
} }
return buffer[0]; return buffer[0];
} }
#else #else
constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110u | x; } constexpr unsigned int dpp_row_shr(unsigned int x) { return 0x110u | x; }
...@@ -96,11 +98,7 @@ __device__ T dpp_mov(T& x) ...@@ -96,11 +98,7 @@ __device__ T dpp_mov(T& x)
input.data = x; input.data = x;
for(index_int i = 0; i < n; i++) for(index_int i = 0; i < n; i++)
{ {
#if defined(__HCC__)
output.reg[i] = __llvm_amdgcn_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
#else
output.reg[i] = __hip_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl); output.reg[i] = __hip_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
#endif
} }
return output.data; return output.data;
} }
...@@ -310,4 +308,4 @@ void reduce(hipStream_t stream, ...@@ -310,4 +308,4 @@ void reduce(hipStream_t stream,
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif // MIGRAPHX_NO_DPP
...@@ -105,7 +105,7 @@ constexpr auto array_for_each(T& x, Ts&... xs) ...@@ -105,7 +105,7 @@ constexpr auto array_for_each(T& x, Ts&... xs)
} }
else else
{ {
using vec_type = std::remove_reference_t<decltype(array2vec(x))>; using vec_type = remove_reference_t<decltype(array2vec(x))>;
f(array2vec(x), __builtin_convertvector(array2vec(xs), vec_type)...); f(array2vec(x), __builtin_convertvector(array2vec(xs), vec_type)...);
} }
} }
......
...@@ -72,7 +72,7 @@ __device__ T dpp_mov(T& x) ...@@ -72,7 +72,7 @@ __device__ T dpp_mov(T& x)
} }
return output.data; return output.data;
} }
#endif #endif // MIGRAPHX_HAS_DPP
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_DPP_HPP #endif // MIGRAPHX_GUARD_KERNELS_DPP_HPP
...@@ -26,7 +26,7 @@ ...@@ -26,7 +26,7 @@
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/algorithm.hpp> #include <migraphx/kernels/algorithm.hpp>
#include <migraphx/kernels/ops.hpp>
namespace migraphx { namespace migraphx {
template <class T> template <class T>
...@@ -53,23 +53,17 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t, ...@@ -53,23 +53,17 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t,
auto indices_shape_lens = indices_shape.lens; auto indices_shape_lens = indices_shape.lens;
auto data_shape_lens = data_shape.lens; auto data_shape_lens = data_shape.lens;
auto num_slice_dims = indices_shape_lens.back(); auto num_slice_dims = indices_shape_lens.back();
std::size_t num_slices = accumulate(indices_shape_lens.begin(), std::size_t num_slices =
indices_shape_lens.end() - 1, accumulate(indices_shape_lens.begin(), indices_shape_lens.end() - 1, 1, op::product{});
1, std::size_t slice_size = accumulate(data_shape_lens.begin() + num_slice_dims + batch_dims,
std::multiplies<std::size_t>());
std::size_t slice_size = accumulate(data_shape_lens.begin() + num_slice_dims + batch_dims,
data_shape_lens.end(), data_shape_lens.end(),
1, 1,
std::multiplies<std::size_t>()); op::product{});
const std::size_t num_batches = accumulate(data_shape_lens.begin(), const std::size_t num_batches =
data_shape_lens.begin() + batch_dims, accumulate(data_shape_lens.begin(), data_shape_lens.begin() + batch_dims, 1, op::product{});
1, const std::size_t data_batch_stride =
std::multiplies<std::size_t>()); accumulate(data_shape_lens.begin() + batch_dims, data_shape_lens.end(), 1, op::product{});
const std::size_t data_batch_stride = accumulate(data_shape_lens.begin() + batch_dims, const auto num_slices_per_batch = num_slices / num_batches;
data_shape_lens.end(),
1,
std::multiplies<std::size_t>());
const auto num_slices_per_batch = num_slices / num_batches;
ind.global_stride(output_shape.elements(), [&](auto i) { ind.global_stride(output_shape.elements(), [&](auto i) {
const auto* indices_ptr = indices_t.data(); const auto* indices_ptr = indices_t.data();
...@@ -83,15 +77,15 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t, ...@@ -83,15 +77,15 @@ __device__ void gathernd(const T& data_t, const U& indices_t, const V& output_t,
int64_t index = slice_indices[idx]; int64_t index = slice_indices[idx];
const std::size_t input_dim_idx = batch_dims + idx; const std::size_t input_dim_idx = batch_dims + idx;
const auto input_dim = data_shape_lens[input_dim_idx]; const auto input_dim = data_shape_lens[input_dim_idx];
assert(index >= -static_cast<int64_t>(input_dim) and MIGRAPHX_ASSERT(index >= -static_cast<int64_t>(input_dim) and
index < static_cast<int64_t>(input_dim)); index < static_cast<int64_t>(input_dim));
if(index < 0) if(index < 0)
index += input_dim; index += input_dim;
std::size_t size_from_slice_dims = std::size_t size_from_slice_dims =
accumulate(data_shape_lens.begin() + batch_dims + idx + 1, accumulate(data_shape_lens.begin() + batch_dims + idx + 1,
data_shape_lens.begin() + batch_dims + num_slice_dims, data_shape_lens.begin() + batch_dims + num_slice_dims,
slice_size, slice_size,
std::multiplies<std::size_t>()); op::product{});
relative_slice_offset += index * size_from_slice_dims; relative_slice_offset += index * size_from_slice_dims;
} }
......
...@@ -24,11 +24,18 @@ ...@@ -24,11 +24,18 @@
#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP #ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP
#define MIGRAPHX_GUARD_KERNELS_HIP_HPP #define MIGRAPHX_GUARD_KERNELS_HIP_HPP
#ifndef MIGRAPHX_USE_HIPRTC
// Workaround macro redefinition issue with clang tidy // Workaround macro redefinition issue with clang tidy
#if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY) #if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY)
#undef __HIP_PLATFORM_HCC__ // NOLINT #undef __HIP_PLATFORM_HCC__ // NOLINT
#endif #endif
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
#include <hip/math_functions.h>
#include <hip/hip_math_constants.h>
#elif defined(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS)
#include <hip/hip_common.h>
#include <hip/hip_math_constants.h>
#endif
#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP #endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
...@@ -28,8 +28,7 @@ ...@@ -28,8 +28,7 @@
#include <migraphx/kernels/vec.hpp> #include <migraphx/kernels/vec.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/type_traits.hpp> #include <migraphx/kernels/type_traits.hpp>
#include <hip/hip_fp16.h> #include <migraphx/kernels/hip.hpp>
#include <hip/math_functions.h>
namespace migraphx { namespace migraphx {
...@@ -222,7 +221,7 @@ constexpr auto min(const T& a, const U& b) ...@@ -222,7 +221,7 @@ constexpr auto min(const T& a, const U& b)
template <class T, MIGRAPHX_REQUIRES(is_same<vec_type<T>, half>{})> template <class T, MIGRAPHX_REQUIRES(is_same<vec_type<T>, half>{})>
constexpr T sin(T x) constexpr T sin(T x)
{ {
constexpr const T shift = M_PI_2; constexpr const T shift = HIP_PIO2_F;
return migraphx::cos(shift - x); return migraphx::cos(shift - x);
} }
......
...@@ -76,14 +76,6 @@ struct shape ...@@ -76,14 +76,6 @@ struct shape
constexpr index_int index(index_array x) const { return x.dot(strides); } constexpr index_int index(index_array x) const { return x.dot(strides); }
constexpr index_int index(std::initializer_list<index_int> x) const
{
index_int idx = 0;
for(index_int i = 0; i < x.size(); i++)
idx += *(x.begin() + i) * strides[i];
return idx;
}
constexpr index_int index(index_int i) const constexpr index_int index(index_int i) const
{ {
if(this->standard()) if(this->standard())
......
...@@ -28,8 +28,45 @@ ...@@ -28,8 +28,45 @@
namespace migraphx { namespace migraphx {
using index_int = std::uint32_t; #if defined(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS) and defined(MIGRAPHX_USE_HIPRTC)
using diff_int = std::int32_t; using int8_t = signed char;
using uint8_t = unsigned char;
using int16_t = signed short;
using uint16_t = unsigned short;
using int32_t = signed int;
using uint32_t = unsigned int;
using int64_t = signed long long;
using uint64_t = unsigned long long;
#elif defined(MIGRAPHX_USE_HIPRTC)
using int8_t = __hip_int8_t;
using uint8_t = __hip_uint8_t;
using int16_t = __hip_int16_t;
using uint16_t = __hip_uint16_t;
using int32_t = __hip_int32_t;
using uint32_t = __hip_uint32_t;
using int64_t = __hip_int64_t;
using uint64_t = __hip_uint64_t;
#else
using int8_t = std::int8_t;
using uint8_t = std::uint8_t;
using int16_t = std::int16_t;
using uint16_t = std::uint16_t;
using int32_t = std::int32_t;
using uint32_t = std::uint32_t;
using int64_t = std::int64_t;
using uint64_t = std::uint64_t;
#endif // MIGRAPHX_USE_HIPRTC
using index_int = uint32_t;
using diff_int = int32_t;
static_assert(sizeof(int8_t) == 1, "int8_t must be 1 bytes");
static_assert(sizeof(uint8_t) == 1, "uint8_t must be 1 bytes");
static_assert(sizeof(int16_t) == 2, "int16_t must be 2 bytes");
static_assert(sizeof(uint16_t) == 2, "uint16_t must be 2 bytes");
static_assert(sizeof(int32_t) == 4, "int32_t must be 4 bytes");
static_assert(sizeof(uint32_t) == 4, "uint32_t must be 4 bytes");
static_assert(sizeof(int64_t) == 8, "int64_t must be 8 bytes");
static_assert(sizeof(uint64_t) == 8, "uint64_t must be 8 bytes");
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT #define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
......
##################################################################################### # ####################################################################################
# The MIT License (MIT) # The MIT License (MIT)
# #
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. # Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
...@@ -20,7 +20,7 @@ ...@@ -20,7 +20,7 @@
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
##################################################################################### # ####################################################################################
cmake_policy(SET CMP0057 NEW) cmake_policy(SET CMP0057 NEW)
...@@ -49,27 +49,31 @@ function(add_test_command NAME EXE) ...@@ -49,27 +49,31 @@ function(add_test_command NAME EXE)
set_tests_properties(${NAME} PROPERTIES DISABLED On) set_tests_properties(${NAME} PROPERTIES DISABLED On)
elseif(WIN32) elseif(WIN32)
set(WINPATH) set(WINPATH)
foreach(PATH ${CMAKE_FIND_ROOT_PATH}) foreach(PATH ${CMAKE_FIND_ROOT_PATH})
list(APPEND WINPATH ${PATH}/bin) list(APPEND WINPATH ${PATH}/bin)
endforeach() endforeach()
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd"
CONTENT "set PATH=${WINPATH};%PATH% CONTENT "set PATH=${WINPATH};%PATH%
%1 ${ARGN}") %1 ${ARGN}")
add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>) add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
else() else()
if(MIGRAPHX_TEST_GDB) if(MIGRAPHX_TEST_GDB)
# add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB} # add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
# --batch # --batch
# --return-child-result # --return-child-result
# -ex "set disable-randomization off" # -ex "set disable-randomization off"
# -ex run # -ex run
# -ex backtrace # -ex backtrace
# --args $<TARGET_FILE:${EXE}> ${ARGN}) # --args $<TARGET_FILE:${EXE}> ${ARGN})
set(TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/gdb/test_${NAME}) set(TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/gdb/test_${NAME})
file(MAKE_DIRECTORY ${TEST_DIR}) file(MAKE_DIRECTORY ${TEST_DIR})
if (NOT EXISTS ${TEST_DIR})
if(NOT EXISTS ${TEST_DIR})
message(FATAL_ERROR "Failed to create test directory: ${TEST_DIR}") message(FATAL_ERROR "Failed to create test directory: ${TEST_DIR}")
endif() endif()
file(GENERATE OUTPUT "${TEST_DIR}/run.cmake" file(GENERATE OUTPUT "${TEST_DIR}/run.cmake"
CONTENT " CONTENT "
# Remove previous core dump # Remove previous core dump
...@@ -90,22 +94,27 @@ function(add_test_command NAME EXE) ...@@ -90,22 +94,27 @@ function(add_test_command NAME EXE)
add_test(NAME ${NAME} COMMAND ${EXE} ${ARGN}) add_test(NAME ${NAME} COMMAND ${EXE} ${ARGN})
endif() endif()
endif() endif()
set_tests_properties(${NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED") set_tests_properties(${NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
endfunction() endfunction()
function(add_test_executable TEST_NAME) function(add_test_executable TEST_NAME)
add_executable (${TEST_NAME} EXCLUDE_FROM_ALL ${ARGN}) add_executable(${TEST_NAME} EXCLUDE_FROM_ALL ${ARGN})
target_link_libraries(${TEST_NAME} ${CMAKE_THREAD_LIBS_INIT}) target_link_libraries(${TEST_NAME} ${CMAKE_THREAD_LIBS_INIT})
# Cmake does not add flags correctly for gcc # Cmake does not add flags correctly for gcc
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU") if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
set_target_properties(${TEST_NAME} PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread) set_target_properties(${TEST_NAME} PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread)
endif() endif()
separate_arguments(MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND ${MIOPEN_TEST_FLAGS}) separate_arguments(MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND ${MIOPEN_TEST_FLAGS})
if(MIOPEN_TEST_ALL) if(MIOPEN_TEST_ALL)
set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} --all ${MIOPEN_TEST_FLAGS_ARGS}) set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} --all ${MIOPEN_TEST_FLAGS_ARGS})
else() else()
set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} ${MIOPEN_TEST_FLAGS_ARGS}) set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} ${MIOPEN_TEST_FLAGS_ARGS})
endif() endif()
add_test_command(${TEST_NAME} ${TEST_COMMAND}) add_test_command(${TEST_NAME} ${TEST_COMMAND})
add_dependencies(tests ${TEST_NAME}) add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME}) add_dependencies(check ${TEST_NAME})
...@@ -129,11 +138,11 @@ if(MIGRAPHX_ENABLE_GPU) ...@@ -129,11 +138,11 @@ if(MIGRAPHX_ENABLE_GPU)
get_filename_component(BASE_NAME ${TEST} NAME_WE) get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_gpu_${BASE_NAME} ${TEST}) add_test_executable(test_gpu_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_gpu_${BASE_NAME}) rocm_clang_tidy_check(test_gpu_${BASE_NAME})
set_tests_properties(test_gpu_${BASE_NAME} PROPERTIES set_tests_properties(test_gpu_${BASE_NAME} PROPERTIES
COST 10 COST 10
RESOURCE_LOCK gpu RESOURCE_LOCK gpu
) )
target_link_libraries(test_gpu_${BASE_NAME} migraphx_gpu) target_link_libraries(test_gpu_${BASE_NAME} migraphx_gpu migraphx_kernels)
endforeach() endforeach()
endif() endif()
...@@ -145,8 +154,8 @@ if(MIGRAPHX_ENABLE_FPGA) ...@@ -145,8 +154,8 @@ if(MIGRAPHX_ENABLE_FPGA)
get_filename_component(BASE_NAME ${TEST} NAME_WE) get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_fpga_${BASE_NAME} ${TEST}) add_test_executable(test_fpga_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_fpga_${BASE_NAME}) rocm_clang_tidy_check(test_fpga_${BASE_NAME})
set_tests_properties(test_fpga_${BASE_NAME} PROPERTIES set_tests_properties(test_fpga_${BASE_NAME} PROPERTIES
COST 10 COST 10
RESOURCE_LOCK fpga RESOURCE_LOCK fpga
) )
target_link_libraries(test_fpga_${BASE_NAME} migraphx_fpga) target_link_libraries(test_fpga_${BASE_NAME} migraphx_fpga)
...@@ -155,7 +164,8 @@ endif() ...@@ -155,7 +164,8 @@ endif()
# Onnx test # Onnx test
set(TEST_ONNX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/onnx) set(TEST_ONNX_DIR ${CMAKE_CURRENT_SOURCE_DIR}/onnx)
file (GLOB ONNX_TESTS ${TEST_ONNX_DIR}/*.cpp) file(GLOB ONNX_TESTS ${TEST_ONNX_DIR}/*.cpp)
foreach(ONNX_TEST ${ONNX_TESTS}) foreach(ONNX_TEST ${ONNX_TESTS})
get_filename_component(BASE_NAME ${ONNX_TEST} NAME_WE) get_filename_component(BASE_NAME ${ONNX_TEST} NAME_WE)
set(TEST_NAME test_${BASE_NAME}) set(TEST_NAME test_${BASE_NAME})
...@@ -163,7 +173,7 @@ foreach(ONNX_TEST ${ONNX_TESTS}) ...@@ -163,7 +173,7 @@ foreach(ONNX_TEST ${ONNX_TESTS})
rocm_clang_tidy_check(${TEST_NAME}) rocm_clang_tidy_check(${TEST_NAME})
target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_ref) target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_ref)
target_include_directories(${TEST_NAME} PUBLIC include) target_include_directories(${TEST_NAME} PUBLIC include)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_ONNX_DIR}) add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_dependencies(tests ${TEST_NAME}) add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME}) add_dependencies(check ${TEST_NAME})
endforeach() endforeach()
...@@ -174,26 +184,26 @@ add_executable(test_tf tf/tf_test.cpp) ...@@ -174,26 +184,26 @@ add_executable(test_tf tf/tf_test.cpp)
rocm_clang_tidy_check(test_tf) rocm_clang_tidy_check(test_tf)
target_link_libraries(test_tf migraphx_tf migraphx_ref) target_link_libraries(test_tf migraphx_tf migraphx_ref)
target_include_directories(test_tf PUBLIC include) target_include_directories(test_tf PUBLIC include)
add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${TEST_TF_DIR}) add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${TEST_TF_DIR})
add_dependencies(tests test_tf) add_dependencies(tests test_tf)
add_dependencies(check test_tf) add_dependencies(check test_tf)
add_subdirectory(api) add_subdirectory(api)
add_subdirectory(verify) add_subdirectory(verify)
if(MIGRAPHX_ENABLE_PYTHON) if(MIGRAPHX_ENABLE_PYTHON)
add_subdirectory(py) add_subdirectory(py)
endif() endif()
function(test_header NAME HEADER) function(test_header NAME HEADER)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
"#include <${HEADER}>\nint main() {}\n" "#include <${HEADER}>\nint main() {}\n"
) )
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp
"#include <${HEADER}>\n" "#include <${HEADER}>\n"
) )
add_test_executable(${NAME} add_test_executable(${NAME}
${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp
) )
endfunction() endfunction()
...@@ -206,6 +216,7 @@ function(test_headers PREFIX) ...@@ -206,6 +216,7 @@ function(test_headers PREFIX)
string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME) string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME)
get_filename_component(BASE_NAME ${HEADER} NAME_WE) get_filename_component(BASE_NAME ${HEADER} NAME_WE)
test_header(header_${TEST_NAME} ${PREFIX}/${BASE_NAME}.hpp) test_header(header_${TEST_NAME} ${PREFIX}/${BASE_NAME}.hpp)
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(header_${TEST_NAME} migraphx_gpu) target_link_libraries(header_${TEST_NAME} migraphx_gpu)
endif() endif()
...@@ -214,6 +225,7 @@ endfunction() ...@@ -214,6 +225,7 @@ endfunction()
test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp) test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/ref/*.hpp) test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/ref/*.hpp)
if(MIGRAPHX_ENABLE_GPU) if(MIGRAPHX_ENABLE_GPU)
test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp) test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
endif() endif()
...@@ -30,7 +30,6 @@ TEST_CASE(load_save_default) ...@@ -30,7 +30,6 @@ TEST_CASE(load_save_default)
std::string filename = "migraphx_api_load_save.mxr"; std::string filename = "migraphx_api_load_save.mxr";
auto p1 = migraphx::parse_onnx("conv_relu_maxpool_test.onnx"); auto p1 = migraphx::parse_onnx("conv_relu_maxpool_test.onnx");
auto s1 = p1.get_output_shapes(); auto s1 = p1.get_output_shapes();
migraphx::save(p1, filename.c_str()); migraphx::save(p1, filename.c_str());
auto p2 = migraphx::load(filename.c_str()); auto p2 = migraphx::load(filename.c_str());
auto s2 = p2.get_output_shapes(); auto s2 = p2.get_output_shapes();
......
...@@ -35,13 +35,14 @@ ...@@ -35,13 +35,14 @@
#include <migraphx/gpu/compile_hip.hpp> #include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/gpu/compile_hip_code_object.hpp> #include <migraphx/gpu/compile_hip_code_object.hpp>
#include <migraphx/gpu/compiler.hpp> #include <migraphx/gpu/compiler.hpp>
#include <migraphx_kernels.hpp>
// NOLINTNEXTLINE // NOLINTNEXTLINE
const std::string write_2s = R"__migraphx__( const std::string write_2s = R"__migraphx__(
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
extern "C" { extern "C" {
__global__ void write(int8_t* data) __global__ void write(char* data)
{ {
int num = threadIdx.x + blockDim.x * blockIdx.x; int num = threadIdx.x + blockDim.x * blockIdx.x;
data[num] = 2; data[num] = 2;
...@@ -58,7 +59,7 @@ const std::string add_2s_binary = R"__migraphx__( ...@@ -58,7 +59,7 @@ const std::string add_2s_binary = R"__migraphx__(
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
extern "C" { extern "C" {
__global__ void add_2(std::int8_t* x, std::int8_t* y) __global__ void add_2(char* x, char* y)
{ {
int num = threadIdx.x + blockDim.x * blockIdx.x; int num = threadIdx.x + blockDim.x * blockIdx.x;
y[num] = x[num] + 2; y[num] = x[num] + 2;
...@@ -137,7 +138,8 @@ int main() {} ...@@ -137,7 +138,8 @@ int main() {}
const std::string math_template = R"__migraphx__( const std::string math_template = R"__migraphx__(
#include <migraphx/kernels/pointwise.hpp> #include <migraphx/kernels/pointwise.hpp>
#include <migraphx/kernels/math.hpp> #include <migraphx/kernels/math.hpp>
#include <migraphx/kernels/types.hpp>
using namespace migraphx;
extern "C" { extern "C" {
__global__ void kernel(${type}* p) __global__ void kernel(${type}* p)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment