"docs/en_US/BuiltinTuner.md" did not exist on "e7c519c04742813f354f42edf0e67ed7e2709fdf"
Commit c6ec6638 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into auto_contig_fix

parents b42c7b41 a6d1540f
...@@ -200,7 +200,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option ...@@ -200,7 +200,7 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
options.params += " " + join_strings(compiler_warnings(), " "); options.params += " " + join_strings(compiler_warnings(), " ");
options.params += " -ftemplate-backtrace-limit=0"; options.params += " -ftemplate-backtrace-limit=0";
options.params += " -Werror"; options.params += " -Werror";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name()); auto cos = compile_hip_src(srcs, options.params, get_device_name());
if(cos.size() != 1) if(cos.size() != 1)
MIGRAPHX_THROW("No code object"); MIGRAPHX_THROW("No code object");
return code_object_op{value::binary{cos.front()}, return code_object_op{value::binary{cos.front()},
......
...@@ -43,24 +43,32 @@ template <index_int N, ...@@ -43,24 +43,32 @@ template <index_int N,
__device__ void block_scan(index idx, Op op, T init, ForStride fs, Input input, Output output) __device__ void block_scan(index idx, Op op, T init, ForStride fs, Input input, Output output)
{ {
using type = decltype(input(deduce_for_stride(fs))); using type = decltype(input(deduce_for_stride(fs)));
MIGRAPHX_DEVICE_SHARED type buffer[N]; MIGRAPHX_DEVICE_SHARED type buffer[2][N];
type x = init; type x = init;
fs([&](auto i) { fs([&](auto i) {
index_int iout = 0;
index_int iin = 1;
if(idx.local == 0) if(idx.local == 0)
buffer[idx.local] = op(input(i), x); buffer[iout][idx.local] = op(input(i), x);
else else
buffer[idx.local] = input(i); buffer[iout][idx.local] = input(i);
__syncthreads(); __syncthreads();
for(index_int s = 1; s < idx.nlocal(); s *= 2) for(index_int s = 1; s < idx.nlocal(); s *= 2)
{ {
if(idx.local + s < idx.nlocal()) iout = 1 - iout;
iin = 1 - iin;
if(idx.local >= s)
{ {
buffer[idx.local + s] = op(buffer[idx.local], buffer[idx.local + s]); buffer[iout][idx.local] = op(buffer[iin][idx.local], buffer[iin][idx.local - s]);
}
else
{
buffer[iout][idx.local] = buffer[iin][idx.local];
} }
__syncthreads(); __syncthreads();
} }
x = buffer[idx.nlocal() - 1]; x = buffer[iout][idx.nlocal() - 1];
output(i, buffer[idx.local]); output(i, buffer[iout][idx.local]);
}); });
} }
......
...@@ -146,7 +146,7 @@ __device__ __host__ T to_hip_type(T x) ...@@ -146,7 +146,7 @@ __device__ __host__ T to_hip_type(T x)
// Hip doens't support __fp16 // Hip doens't support __fp16
inline __device__ __host__ float to_hip_type(gpu_half x) { return x; } inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
#define MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(trait, T) \ #define MIGRAPHX_DEVICE_DETAIL_EXTEND_TRAIT_FOR(trait, T) \
template <class X> \ template <class X> \
struct trait : std::trait<X> \ struct trait : std::trait<X> \
{ \ { \
...@@ -157,9 +157,9 @@ inline __device__ __host__ float to_hip_type(gpu_half x) { return x; } ...@@ -157,9 +157,9 @@ inline __device__ __host__ float to_hip_type(gpu_half x) { return x; }
{ \ { \
}; };
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, __fp16) MIGRAPHX_DEVICE_DETAIL_EXTEND_TRAIT_FOR(is_floating_point, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_signed, __fp16) MIGRAPHX_DEVICE_DETAIL_EXTEND_TRAIT_FOR(is_signed, __fp16)
MIGRAPHX_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, __fp16) MIGRAPHX_DEVICE_DETAIL_EXTEND_TRAIT_FOR(is_arithmetic, __fp16)
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
This diff is collapsed.
...@@ -46,6 +46,7 @@ rocblas_datatype get_type(shape::type_t type) ...@@ -46,6 +46,7 @@ rocblas_datatype get_type(shape::type_t type)
case shape::uint8_type: return rocblas_datatype_u8_r; case shape::uint8_type: return rocblas_datatype_u8_r;
case shape::int32_type: return rocblas_datatype_i32_r; case shape::int32_type: return rocblas_datatype_i32_r;
case shape::uint32_type: return rocblas_datatype_u32_r; case shape::uint32_type: return rocblas_datatype_u32_r;
case shape::fp8e4m3fnuz_type:
case shape::tuple_type: case shape::tuple_type:
case shape::bool_type: case shape::bool_type:
case shape::uint16_type: case shape::uint16_type:
......
...@@ -58,10 +58,10 @@ struct hiprtc_src_file ...@@ -58,10 +58,10 @@ struct hiprtc_src_file
MIGRAPHX_GPU_EXPORT bool hip_has_flags(const std::vector<std::string>& flags); MIGRAPHX_GPU_EXPORT bool hip_has_flags(const std::vector<std::string>& flags);
MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>> compile_hip_src_with_hiprtc( MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>> compile_hip_src_with_hiprtc(
std::vector<hiprtc_src_file> srcs, std::string params, const std::string& arch); std::vector<hiprtc_src_file> srcs, const std::string& params, const std::string& arch);
MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>> MIGRAPHX_GPU_EXPORT std::vector<std::vector<char>> compile_hip_src(
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch); const std::vector<src_file>& srcs, const std::string& params, const std::string& arch);
MIGRAPHX_GPU_EXPORT std::string enum_params(std::size_t count, std::string param); MIGRAPHX_GPU_EXPORT std::string enum_params(std::size_t count, std::string param);
......
...@@ -211,6 +211,12 @@ inline pooling_descriptor make_pooling(const migraphx::op::pooling& op) ...@@ -211,6 +211,12 @@ inline pooling_descriptor make_pooling(const migraphx::op::pooling& op)
ss << op.mode; ss << op.mode;
MIGRAPHX_THROW(ss.str()); MIGRAPHX_THROW(ss.str());
} }
if(not std::all_of(
op.dilations.cbegin(), op.dilations.cend(), [](std::size_t d) { return d == 1; }))
{
MIGRAPHX_THROW("Unsupported dilations for pooling: [" + to_string_range(op.dilations) +
"]");
}
auto p = make_obj<pooling_descriptor>(&miopenCreatePoolingDescriptor); auto p = make_obj<pooling_descriptor>(&miopenCreatePoolingDescriptor);
int kdims = op.kdims(); int kdims = op.kdims();
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -33,8 +33,9 @@ rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION}) ...@@ -33,8 +33,9 @@ rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION})
find_path(BLAZE_INCLUDE blaze/Blaze.h) find_path(BLAZE_INCLUDE blaze/Blaze.h)
rocm_clang_tidy_check(migraphx_ref) rocm_clang_tidy_check(migraphx_ref)
target_link_libraries(migraphx_ref PRIVATE Threads::Threads)
target_link_libraries(migraphx_ref PUBLIC migraphx) target_link_libraries(migraphx_ref PUBLIC migraphx)
target_include_directories(migraphx_ref PRIVATE ${BLAZE_INCLUDE}) target_include_directories(migraphx_ref SYSTEM PRIVATE ${BLAZE_INCLUDE})
target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS) target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS)
migraphx_generate_export_header(migraphx_ref) migraphx_generate_export_header(migraphx_ref)
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment