Commit c6560f37 authored by Paul's avatar Paul
Browse files

Merge from master

parents 218e20fc 3124c7f7
......@@ -47,7 +47,7 @@ add_compile_options(-std=c++14)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
include(EnableCompilerWarnings)
include(ROCMClangTidy)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
if(CMAKE_CXX_COMPILER MATCHES ".*hcc" OR CMAKE_CXX_COMPILER MATCHES ".*clang\\+\\+")
set(MIGRAPHX_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
# Enable tidy on hip
elseif(MIGRAPHX_ENABLE_GPU)
......
......@@ -72,7 +72,7 @@ add_library(migraphx_device
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
......@@ -81,6 +81,11 @@ if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
endif()
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device)
endif()
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
......
......@@ -31,7 +31,7 @@ struct multi_index
};
template <class ForStride>
auto deduce_for_stride(ForStride fs) -> decltype(fs(id{}));
__device__ __host__ auto deduce_for_stride(ForStride fs) -> decltype(fs(id{}));
MIGRAPHX_DEVICE_CONSTEXPR multi_index<1> make_multi_index(index_int i, index_int n)
{
......
......@@ -69,7 +69,7 @@ struct min
struct lowest
{
template <class T>
operator T() const
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::lowest());
}
......@@ -78,7 +78,7 @@ struct lowest
struct highest
{
template <class T>
operator T() const
__device__ __host__ operator T() const
{
return device_cast(std::numeric_limits<host_type<T>>::max());
}
......@@ -140,7 +140,11 @@ __device__ T dpp_mov(T& x)
input.data = x;
for(index_int i = 0; i < n; i++)
{
#if defined(__HCC__)
output.reg[i] = __llvm_amdgcn_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
#else
output.reg[i] = __hip_move_dpp(input.reg[i], DppCtrl, RowMask, BankMask, BoundCtrl);
#endif
}
return output.data;
}
......
......@@ -103,19 +103,19 @@ host_type<T>* host_cast(T* x)
}
template <class T>
device_type<T> device_cast(const T& x)
__device__ __host__ device_type<T> device_cast(const T& x)
{
return reinterpret_cast<const device_type<T>&>(x);
}
template <class T>
device_type<T>* device_cast(T* x)
__device__ __host__ device_type<T>* device_cast(T* x)
{
return reinterpret_cast<device_type<T>*>(x);
}
template <class T>
tensor_view<device_type<T>> device_cast(tensor_view<T> x)
__device__ __host__ tensor_view<device_type<T>> device_cast(tensor_view<T> x)
{
return {x.get_shape(), reinterpret_cast<device_type<T>*>(x.data())};
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment