Unverified Commit a33d6fa2 authored by Paul Fultz II's avatar Paul Fultz II Committed by GitHub
Browse files

Check jit kernels files with clang-tidy (#1012)

* Check jit kernels files with clang-tidy
parent 4f9a0ce7
...@@ -91,28 +91,34 @@ add_library(migraphx_device ...@@ -91,28 +91,34 @@ add_library(migraphx_device
device/unary_not.cpp device/unary_not.cpp
device/where.cpp device/where.cpp
) )
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device) add_library(compile_for_gpu INTERFACE)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION}) target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
rocm_clang_tidy_check(migraphx_device) target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
foreach(AMDGPU_TARGET ${AMDGPU_TARGETS})
target_compile_options(migraphx_device PRIVATE -amdgpu-target=${AMDGPU_TARGET})
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
else()
target_compile_options(migraphx_device PRIVATE -Wno-cuda-compat)
endif()
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE) check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE) if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device") message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device) target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif() endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check)
add_library(migraphx_gpu add_library(migraphx_gpu
abs.cpp abs.cpp
analyze_streams.cpp analyze_streams.cpp
...@@ -341,7 +347,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels) ...@@ -341,7 +347,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver) add_subdirectory(driver)
rocm_install_targets( rocm_install_targets(
TARGETS migraphx_gpu migraphx_device TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include ${CMAKE_CURRENT_SOURCE_DIR}/include
) )
......
...@@ -14,17 +14,29 @@ namespace gpu { ...@@ -14,17 +14,29 @@ namespace gpu {
static const char* const roialign_kernel = R"__migraphx__( static const char* const roialign_kernel = R"__migraphx__(
#include <migraphx/kernels/roialign.hpp> #include <migraphx/kernels/roialign.hpp>
#include <migraphx/kernels/basic_ops.hpp> #include <migraphx/kernels/basic_ops.hpp>
#include <migraphx/kernels/integral_constant.hpp>
#include <migraphx/kernels/generic_constant.hpp>
#include <args.hpp> #include <args.hpp>
using namespace migraphx; namespace migraphx {
extern "C" { extern "C" {
__global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y) __global__ void roialign_kernel(void* in_x, void* in_rois, void* in_ind, void* y)
{ {
make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) { roialign(xs...); }); make_tensors()(in_x, in_rois, in_ind, y)([](auto&&... xs) {
auto settings = make_roalign_settings(MIGRAPHX_MAKE_CONSTANT(float{ROIS_OFFSET}),
_c<bool{IS_AVG_POOLING}>,
_c<int64_t{SAMPLING_RATIO}>,
MIGRAPHX_MAKE_CONSTANT(float{SPATIAL_SCALE}));
roialign(xs..., settings);
});
} }
} }
} // namespace migraphx
int main() {} int main() {}
)__migraphx__"; )__migraphx__";
......
...@@ -176,23 +176,23 @@ struct array ...@@ -176,23 +176,23 @@ struct array
} }
}; };
template <class T, T... xs> template <class T, T... Xs>
struct integral_const_array : array<T, sizeof...(xs)> struct integral_const_array : array<T, sizeof...(Xs)>
{ {
using base_array = array<T, sizeof...(xs)>; using base_array = array<T, sizeof...(Xs)>;
MIGRAPHX_DEVICE_CONSTEXPR integral_const_array() : base_array({xs...}) {} MIGRAPHX_DEVICE_CONSTEXPR integral_const_array() : base_array({Xs...}) {}
}; };
template <class T, T... xs, class F> template <class T, T... Xs, class F>
constexpr auto transform(integral_const_array<T, xs...>, F f) constexpr auto transform(integral_const_array<T, Xs...>, F f)
{ {
return integral_const_array<T, f(xs)...>{}; return integral_const_array<T, f(Xs)...>{};
} }
template <class T, T... xs, class U, U... ys, class F> template <class T, T... Xs, class U, U... Ys, class F>
constexpr auto transform(integral_const_array<T, xs...>, integral_const_array<U, ys...>, F f) constexpr auto transform(integral_const_array<T, Xs...>, integral_const_array<U, Ys...>, F f)
{ {
return integral_const_array<T, f(xs, ys)...>{}; return integral_const_array<T, f(Xs, Ys)...>{};
} }
template <index_int... Ns> template <index_int... Ns>
......
#ifndef MIGRAPHX_GUARD_KERNELS_DEBUG_HPP #ifndef MIGRAPHX_GUARD_KERNELS_DEBUG_HPP
#define MIGRAPHX_GUARD_KERNELS_DEBUG_HPP #define MIGRAPHX_GUARD_KERNELS_DEBUG_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
namespace migraphx { namespace migraphx {
inline __host__ __device__ void // Workaround hip's broken abort on device code
#ifdef __HIP_DEVICE_COMPILE__
// NOLINTNEXTLINE
#define MIGRAPHX_HIP_NORETURN
#else
// NOLINTNEXTLINE
#define MIGRAPHX_HIP_NORETURN [[noreturn]]
#endif
// noreturn cannot be used on this function because abort in hip is broken
MIGRAPHX_HIP_NORETURN inline __host__ __device__ void
assert_fail(const char* assertion, const char* file, unsigned int line, const char* function) assert_fail(const char* assertion, const char* file, unsigned int line, const char* function)
{ {
printf("%s:%u: %s: assertion '%s' failed.\n", file, line, function, assertion); printf("%s:%u: %s: assertion '%s' failed.\n", file, line, function, assertion);
......
...@@ -168,6 +168,7 @@ constexpr auto transform_args(F f, Fs... fs) ...@@ -168,6 +168,7 @@ constexpr auto transform_args(F f, Fs... fs)
return [=](auto... xs) { return transform_args(f)(xs...)(transform_args(fs...)); }; return [=](auto... xs) { return transform_args(f)(xs...)(transform_args(fs...)); };
} }
// NOLINTNEXTLINE
#define MIGRAPHX_LIFT(...) \ #define MIGRAPHX_LIFT(...) \
([](auto&&... xs) { return (__VA_ARGS__)(static_cast<decltype(xs)>(xs)...); }) ([](auto&&... xs) { return (__VA_ARGS__)(static_cast<decltype(xs)>(xs)...); })
......
#ifndef MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#define MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
namespace migraphx {
template <class F>
struct generic_constant
{
static constexpr auto value = F{}();
using value_type = decltype(value);
using type = generic_constant;
constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; }
};
template <class F>
constexpr generic_constant<F> make_generic_constant(F)
{
return {};
}
// NOLINTNEXTLINE
#define MIGRAPHX_MAKE_CONSTANT(x) \
make_generic_constant([] { \
struct fun \
{ \
constexpr auto operator()() const { return x; } \
}; \
return fun{}; \
}())
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_GENERIC_CONSTANT_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_HIP_HPP
#define MIGRAPHX_GUARD_KERNELS_HIP_HPP
// Workaround macro redefinition issue with clang tidy
#if defined(__HIP_PLATFORM_HCC__) && defined(MIGRAPHX_USE_CLANG_TIDY)
#undef __HIP_PLATFORM_HCC__ // NOLINT
#endif
#include <hip/hip_runtime.h>
#endif // MIGRAPHX_GUARD_KERNELS_HIP_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP #ifndef MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#define MIGRAPHX_GUARD_KERNELS_INDEX_HPP #define MIGRAPHX_GUARD_KERNELS_INDEX_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/types.hpp> #include <migraphx/kernels/types.hpp>
namespace migraphx { namespace migraphx {
...@@ -17,7 +17,7 @@ struct index ...@@ -17,7 +17,7 @@ struct index
#ifdef MIGRAPHX_NGLOBAL #ifdef MIGRAPHX_NGLOBAL
return MIGRAPHX_NGLOBAL; return MIGRAPHX_NGLOBAL;
#else #else
return blockDim.x * gridDim.x; return blockDim.x * gridDim.x; // NOLINT
#endif #endif
} }
...@@ -26,7 +26,7 @@ struct index ...@@ -26,7 +26,7 @@ struct index
#ifdef MIGRAPHX_NLOCAL #ifdef MIGRAPHX_NLOCAL
return MIGRAPHX_NLOCAL; return MIGRAPHX_NLOCAL;
#else #else
return blockDim.x; return blockDim.x; // NOLINT
#endif #endif
} }
...@@ -53,7 +53,7 @@ struct index ...@@ -53,7 +53,7 @@ struct index
inline __device__ index make_index() inline __device__ index make_index()
{ {
return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
} }
} // namespace migraphx } // namespace migraphx
......
...@@ -5,28 +5,30 @@ ...@@ -5,28 +5,30 @@
namespace migraphx { namespace migraphx {
template <class T, T v> template <class T, T V>
struct integral_constant struct integral_constant
{ {
static constexpr T value = v; static constexpr T value = V;
using value_type = T; using value_type = T;
using type = integral_constant; using type = integral_constant;
constexpr operator value_type() const noexcept { return value; } constexpr operator value_type() const noexcept { return value; }
constexpr value_type operator()() const noexcept { return value; } constexpr value_type operator()() const noexcept { return value; }
}; };
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_BINARY_OP(op) \
template <class T, T v, class U, U w> \ template <class T, T V, class U, U w> \
constexpr inline integral_constant<decltype(v op w), (v op w)> operator op( \ constexpr inline integral_constant<decltype(V op w), (V op w)> operator op( \
integral_constant<T, v>, integral_constant<U, w>) noexcept \ integral_constant<T, V>, integral_constant<U, w>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
// NOLINTNEXTLINE
#define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \ #define MIGRAPHX_INTEGRAL_CONSTANT_UNARY_OP(op) \
template <class T, T v> \ template <class T, T V> \
constexpr inline integral_constant<decltype(op v), (op v)> operator op( \ constexpr inline integral_constant<decltype(op V), (op V)> operator op( \
integral_constant<T, v>) noexcept \ integral_constant<T, V>) noexcept \
{ \ { \
return {}; \ return {}; \
} }
...@@ -64,8 +66,8 @@ using false_type = bool_constant<false>; ...@@ -64,8 +66,8 @@ using false_type = bool_constant<false>;
template <index_int N> template <index_int N>
using index_constant = integral_constant<index_int, N>; using index_constant = integral_constant<index_int, N>;
template <auto v> template <auto V>
static constexpr auto _c = integral_constant<decltype(v), v>{}; static constexpr auto _c = integral_constant<decltype(V), V>{}; // NOLINT
} // namespace migraphx } // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP #endif // MIGRAPHX_GUARD_KERNELS_INTEGRAL_CONSTANT_HPP
...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss) ...@@ -14,9 +14,7 @@ constexpr auto traverse_preload(Shapes... ss)
auto each = [&](auto x) { auto each = [&](auto x) {
constexpr auto s = decltype(x.get_shape()){}; constexpr auto s = decltype(x.get_shape()){};
constexpr auto size = _c<s.element_space()>; constexpr auto size = _c<s.element_space()>;
if constexpr(not s.broadcasted()) if constexpr(not s.broadcasted() or (s.elements() - size) < 64)
return f(x, offset, false_type{});
else if constexpr((s.elements() - size) < 64)
return f(x, offset, false_type{}); return f(x, offset, false_type{});
else else
{ {
......
#ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP #ifndef MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#define MIGRAPHX_GUARD_KERNELS_PRINT_HPP #define MIGRAPHX_GUARD_KERNELS_PRINT_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/functional.hpp> #include <migraphx/kernels/functional.hpp>
#include <migraphx/kernels/algorithm.hpp> #include <migraphx/kernels/algorithm.hpp>
......
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
#include <migraphx/kernels/index.hpp> #include <migraphx/kernels/index.hpp>
#include <migraphx/kernels/dfor.hpp> #include <migraphx/kernels/dfor.hpp>
#include <migraphx/kernels/basic_ops.hpp> #include <migraphx/kernels/basic_ops.hpp>
#include <args.hpp> #include <migraphx/kernels/array.hpp>
namespace migraphx { namespace migraphx {
...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data, ...@@ -104,14 +104,24 @@ MIGRAPHX_DEVICE_CONSTEXPR T calc_pooling(const T*& data,
return op.final(output_val, count); return op.final(output_val, count);
} }
template <class T, class U, class V, class W> template <class T1, class T2, class T3, class T4>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t) struct roalign_settings
{ {
const float roi_offset = ROIS_OFFSET; T1 roi_offset{};
const bool is_avg_pooling = IS_AVG_POOLING; T2 is_avg_pooling{};
const int64_t sampling_ratio = SAMPLING_RATIO; T3 sampling_ratio{};
const float spatial_scale = SPATIAL_SCALE; T4 spatial_scale{};
};
template <class... Ts>
constexpr roalign_settings<Ts...> make_roalign_settings(Ts... xs)
{
return {xs...};
}
template <class T, class U, class V, class W, class Settings>
__device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& y_t, Settings s)
{
auto index = make_index(); auto index = make_index();
const auto* x = x_t.data(); const auto* x = x_t.data();
const auto* rois = rois_t.data(); const auto* rois = rois_t.data();
...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -146,9 +156,10 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
const auto* offset_rois = rois + (n * roi_column_num); const auto* offset_rois = rois + (n * roi_column_num);
const int batch_ind = ind[n]; const int batch_ind = ind[n];
array<float, 2> roi_starts = {offset_rois[1] * spatial_scale, array<float, 2> roi_starts = {offset_rois[1] * s.spatial_scale,
offset_rois[0] * spatial_scale}; offset_rois[0] * s.spatial_scale};
array<float, 2> roi_ends = {offset_rois[3] * spatial_scale, offset_rois[2] * spatial_scale}; array<float, 2> roi_ends = {offset_rois[3] * s.spatial_scale,
offset_rois[2] * s.spatial_scale};
array<float, 2> roi_size{}; array<float, 2> roi_size{};
array<float, 2> bin_size{}; array<float, 2> bin_size{};
...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -161,11 +172,11 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
bin_size[ii] = roi_size[ii] / out_dims[ii]; bin_size[ii] = roi_size[ii] / out_dims[ii];
bin_grid_size[ii] = bin_grid_size[ii] =
(sampling_ratio > 0) ? sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]); (s.sampling_ratio > 0) ? s.sampling_ratio : std::ceil(roi_size[ii] / out_dims[ii]);
} }
const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); const auto* offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(is_avg_pooling) if constexpr(s.is_avg_pooling)
{ {
out_ptr[i] = calc_pooling(offset_x, out_ptr[i] = calc_pooling(offset_x,
roi_starts, roi_starts,
...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -173,7 +184,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
avg_pool{}); avg_pool{});
} }
else else
...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W& ...@@ -184,7 +195,7 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, const W&
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
roi_offset, s.roi_offset,
max_pool{}); max_pool{});
} }
} }
......
#ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #ifndef MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP #define MIGRAPHX_GUARD_AMDMIGRAPHX_KERNELS_TYPES_HPP
#include <hip/hip_runtime.h> #include <migraphx/kernels/hip.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -13,7 +13,7 @@ constexpr auto vec_size(vec<T, N>) ...@@ -13,7 +13,7 @@ constexpr auto vec_size(vec<T, N>)
} }
template <class T> template <class T>
constexpr auto vec_size(T, ...) constexpr auto vec_size(T, ...) // NOLINT
{ {
return index_constant<0>{}; return index_constant<0>{};
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment