Commit 7e297b13 authored by Paul's avatar Paul
Browse files

Merge

parents 86ea5e91 aa7ff911
......@@ -213,7 +213,6 @@ template <std::size_t N, class... Xs>
bool is_vectorizable(const Xs&... xs)
{
return all_of({xs...}, [](const auto& s) {
if(s.standard() and (s.lens().back() % N) == 0)
return true;
if(s.broadcasted())
......@@ -320,11 +319,10 @@ struct cpu_unary : reduce_dims_base, auto_register_op<cpu_unary<Op>>
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2);
auto s = inputs.at(0);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument
// cppcheck-suppress constParameter
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{
argument result = get_arg(args, args.size() - 1);
......@@ -358,12 +356,11 @@ struct cpu_binary : reduce_dims_base, auto_register_op<cpu_binary<Op>>
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(3);
auto s = inputs.at(0);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
argument
// cppcheck-suppress constParameter
compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
{
argument result = get_arg(args, args.size() - 1);
......
......@@ -31,7 +31,7 @@ struct dnnl_layernorm : dnnl_op<dnnl_layernorm, dnnl::layer_normalization_forwar
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
return {dnnl::prop_kind::forward_inference,
m.at(DNNL_ARG_SRC),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
1e-12f,
dnnl::normalization_flags::none};
}
......
......@@ -12,7 +12,7 @@ struct dnnl_logsoftmax : dnnl_extend_op<dnnl_logsoftmax, dnnl::logsoftmax_forwar
get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
int axis = this->op.axis;
return {dnnl::prop_kind::forward_inference, m.at(DNNL_ARG_SRC_0), axis};
return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
}
};
......
......@@ -223,7 +223,7 @@ struct cpu_unary2 : auto_register_op<cpu_unary2<Op>>
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(1);
auto s = inputs.at(0);
const auto& s = inputs.at(0);
return {s.type(), s.lens()};
}
......@@ -352,7 +352,7 @@ struct cpu_apply
std::transform(bind_inputs.begin(),
bind_inputs.end(),
std::back_inserter(inputs),
[&](const auto& s) { return r.instructions.at(s); });
[&](const auto& s) { return r.instructions[s]; });
inputs.push_back(this->insert_allocation(ins, ins->get_shape()));
modl->replace_instruction(ins, op, inputs);
});
......@@ -392,8 +392,10 @@ struct cpu_apply
extend_op("concat", "dnnl::concat");
extend_op("contiguous", "dnnl::reorder");
extend_op("convolution", "dnnl::convolution");
#ifndef MIGRAPHX_ENABLE_ZENDNN
extend_op("deconvolution", "dnnl::deconvolution");
extend_op("dot", "dnnl::dot");
#endif
extend_op("erf", "cpu::erf");
extend_op("gather", "cpu::gather");
extend_op("logsoftmax", "dnnl::logsoftmax");
......@@ -458,11 +460,6 @@ struct cpu_apply
if(has_op("dnnl::pooling") and ins->get_shape().type() == shape::type_t::float_type and
not v["ceil_mode"].to<bool>())
return replace(ins, make_op("dnnl::pooling", op.to_value()));
std::string mode = v["mode"].to<std::string>();
if(mode == "max")
return replace(ins, make_op("cpu::pooling_max", v));
else if(mode == "average")
return replace(ins, make_op("cpu::pooling_average", v));
return ins;
}
......
......@@ -12,7 +12,7 @@ struct dnnl_lrn : dnnl_extend_op<dnnl_lrn, dnnl::lrn_forward, op::lrn>
{
return {dnnl::prop_kind::forward_inference,
dnnl::algorithm::lrn_across_channels,
m.at(DNNL_ARG_SRC_0),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)),
this->op.size,
this->op.alpha,
this->op.beta,
......
......@@ -11,132 +11,21 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace cpu {
struct max_pool
{
static std::string name() { return "max"; }
template <class T>
static T start()
{
return std::numeric_limits<T>::lowest();
}
static double apply(double x, double y)
{
double m = std::max(x, y);
return (m);
}
static double final(double x, std::size_t) { return (x); }
};
struct avg_pool
{
static std::string name() { return "average"; }
template <class T>
static double start()
{
return 0.0;
}
static double apply(double x, double y) { return x + y; }
static double final(double x, std::size_t y) { return (y == 0) ? 0.0 : (x / y); }
};
template <class Op>
struct cpu_pooling : auto_register_op<cpu_pooling<Op>>
{
cpu_pooling() = default;
cpu_pooling(op::pooling pop) : op(std::move(pop)) {}
op::pooling op;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return migraphx::reflect(self.op, f);
}
std::string name() const { return "cpu::pooling_" + Op::name(); }
shape compute_shape(std::vector<shape> inputs) const
{
inputs.pop_back();
return op.normalize_compute_shape(inputs);
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
argument compute(context&, const shape& output_shape, std::vector<argument> args) const
{
visit_all(args.back(), args[0])([&](auto output, auto input) {
using type = typename decltype(output)::value_type;
auto in_s = input.get_shape();
auto in_lens = in_s.lens();
std::vector<std::size_t> vec_len(in_lens.begin() + 2, in_lens.end());
par_for(output_shape.elements(), [&](auto i) {
auto idx_o = output_shape.multi(i);
auto n_dim = idx_o.size();
std::vector<std::size_t> win_start;
std::vector<std::size_t> win_size;
for(std::size_t dim = 2; dim < n_dim; ++dim)
{
auto d_2 = dim - 2;
int start = static_cast<int>(idx_o[dim] * op.stride[d_2]) -
static_cast<int>(op.padding[d_2]);
int end = std::min(start + op.lengths[d_2], in_lens[dim]);
start = std::max(start, 0);
win_start.push_back(start);
win_size.push_back(end - start);
}
shape win_shape{output_shape.type(), win_size};
auto pool_size = win_shape.elements();
double acc = Op::template start<type>();
shape_for_each(win_shape, [&](auto idx_w) {
auto idx = idx_o;
std::transform(idx_w.begin(),
idx_w.end(),
win_start.begin(),
idx.begin() + 2,
[](auto ii, auto jj) { return ii + jj; });
if(std::all_of(idx.begin() + 2, idx.end(), [&](auto ii) { return ii >= 0; }) and
idx < in_lens)
{
acc = Op::apply(acc, input[in_s.index(idx)]);
}
});
output[i] = type(Op::final(acc, pool_size));
});
});
return args.back();
}
};
template struct cpu_pooling<avg_pool>;
template struct cpu_pooling<max_pool>;
struct dnnl_pooling : dnnl_extend_op<dnnl_pooling, dnnl::pooling_forward, op::pooling>
{
std::vector<int> arg_map(int) const { return {DNNL_ARG_SRC}; }
std::vector<int> arg_map(int) const { return {MIGRAPHX_DNNL_PREFIX(ARG_SRC)}; }
dnnl::pooling_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
auto algo = op.mode == "max" ? dnnl::algorithm::pooling_max : dnnl::algorithm::pooling_avg;
auto algo = op.mode == op::pooling_mode::max ? dnnl::algorithm::pooling_max
: dnnl::algorithm::pooling_avg;
auto kdims = op.kdims();
std::vector<size_t> padding_l(op.padding.begin(), op.padding.begin() + kdims);
std::vector<size_t> padding_r(op.padding.begin() + kdims, op.padding.end());
return {dnnl::prop_kind::forward_inference,
algo,
m.at(DNNL_ARG_SRC),
m.at(DNNL_ARG_DST),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
to_dnnl_dims(op.stride),
to_dnnl_dims(op.lengths),
to_dnnl_dims(padding_l),
......@@ -145,5 +34,6 @@ struct dnnl_pooling : dnnl_extend_op<dnnl_pooling, dnnl::pooling_forward, op::po
};
} // namespace cpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
File mode changed from 100755 to 100644
......@@ -37,7 +37,11 @@ struct dnnl_reduction : dnnl_op<dnnl_reduction, dnnl::reduction>
dnnl::reduction::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
return {to_dnnl_algo(algo), m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST), 0, 0};
return {to_dnnl_algo(algo),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)),
m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST)),
0,
0};
}
};
......
......@@ -27,7 +27,7 @@ struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder>
};
desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
return {m.at(DNNL_ARG_SRC), m.at(DNNL_ARG_DST)};
return {m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC)), m.at(MIGRAPHX_DNNL_PREFIX(ARG_DST))};
}
auto get_primitive_desc(const desc& d, const dnnl::primitive_attr& attr) const
......
......@@ -11,7 +11,7 @@ struct dnnl_softmax : dnnl_extend_op<dnnl_softmax, dnnl::softmax_forward, op::so
dnnl::softmax_forward::desc get_desc(const std::unordered_map<int, dnnl::memory::desc>& m) const
{
int axis = this->op.axis;
return {dnnl::prop_kind::forward_inference, m.at(DNNL_ARG_SRC_0), axis};
return {dnnl::prop_kind::forward_inference, m.at(MIGRAPHX_DNNL_PREFIX(ARG_SRC_0)), axis};
}
};
......
......@@ -3,7 +3,6 @@
#include <migraphx/check_context.hpp>
#include <migraphx/adjust_allocation.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/decompose.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp>
......@@ -15,7 +14,6 @@
#include <migraphx/memory_coloring.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/remap.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/rewrite_pooling.hpp>
#include <migraphx/rewrite_quantization.hpp>
......@@ -23,6 +21,7 @@
#include <migraphx/schedule.hpp>
#include <migraphx/memory_coloring.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/cpu/fuse_ops.hpp>
......@@ -52,8 +51,6 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
eliminate_data_type{unsupported_types, shape::type_t::float_type},
dead_code_elimination{},
decompose{},
dead_code_elimination{},
simplify_reshapes{},
eliminate_identity{},
eliminate_pad{},
......
......@@ -11,7 +11,7 @@ if(NOT TARGET MIOpen)
endif()
include(Embed)
file(GLOB KERNEL_FILES
file(GLOB KERNEL_FILES ${CONFIGURE_DEPENDS}
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp)
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES})
......@@ -41,6 +41,7 @@ add_library(migraphx_device
device/equal.cpp
device/erf.cpp
device/exp.cpp
device/fill.cpp
device/floor.cpp
device/gather.cpp
device/gelu.cpp
......@@ -58,6 +59,8 @@ add_library(migraphx_device
device/mul.cpp
device/mul_add.cpp
device/mul_add_relu.cpp
device/multinomial.cpp
device/nonzero.cpp
device/pad.cpp
device/pow.cpp
device/prelu.cpp
......@@ -84,30 +87,40 @@ add_library(migraphx_device
device/sub.cpp
device/tan.cpp
device/tanh.cpp
device/topk.cpp
device/unary_not.cpp
device/where.cpp
)
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_compile_options(migraphx_device PRIVATE -std=c++17 -fno-gpu-rdc -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(migraphx_device migraphx hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument)
if(CMAKE_CXX_COMPILER MATCHES ".*hcc")
set(AMDGPU_TARGETS "gfx803;gfx900;gfx906" CACHE STRING "")
foreach(AMDGPU_TARGET ${AMDGPU_TARGETS})
target_compile_options(migraphx_device PRIVATE -amdgpu-target=${AMDGPU_TARGET})
target_link_libraries(migraphx_device -amdgpu-target=${AMDGPU_TARGET})
endforeach()
else()
target_compile_options(migraphx_device PRIVATE -Wno-cuda-compat)
endif()
add_library(compile_for_gpu INTERFACE)
target_compile_options(compile_for_gpu INTERFACE -std=c++17 -fno-gpu-rdc -Wno-cuda-compat -Wno-unused-command-line-argument -Xclang -fallow-half-arguments-and-returns)
target_link_libraries(compile_for_gpu INTERFACE hip::device -fno-gpu-rdc -Wno-invalid-command-line-argument -Wno-unused-command-line-argument -Wno-option-ignored)
check_cxx_compiler_flag("--cuda-host-only -fhip-lambda-host-device -x hip" HAS_HIP_LAMBDA_HOST_DEVICE)
if(HAS_HIP_LAMBDA_HOST_DEVICE)
message(STATUS "Enable -fhip-lambda-host-device")
target_compile_options(migraphx_device PRIVATE -fhip-lambda-host-device)
target_compile_options(compile_for_gpu INTERFACE -fhip-lambda-host-device)
endif()
set_target_properties(migraphx_device PROPERTIES EXPORT_NAME device)
rocm_set_soversion(migraphx_device ${MIGRAPHX_SO_VERSION})
rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
add_library(kernel_file_check EXCLUDE_FROM_ALL)
foreach(KERNEL_FILE ${KERNEL_FILES})
get_filename_component(KERNEL_BASE_FILE ${KERNEL_FILE} NAME_WE)
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp "#include <migraphx/kernels/${KERNEL_BASE_FILE}.hpp>\n")
target_sources(kernel_file_check PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/kernels/include/migraphx/kernels/${KERNEL_BASE_FILE}.cpp)
endforeach()
target_compile_definitions(kernel_file_check PRIVATE -DMIGRAPHX_NLOCAL=256)
target_include_directories(kernel_file_check PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/>)
target_link_libraries(kernel_file_check compile_for_gpu)
rocm_clang_tidy_check(kernel_file_check)
file(GLOB JIT_GPU_SRCS ${CONFIGURE_DEPENDS} ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp)
add_library(migraphx_gpu
abs.cpp
analyze_streams.cpp
......@@ -117,9 +130,11 @@ add_library(migraphx_gpu
batch_norm_inference.cpp
clip.cpp
code_object_op.cpp
compile_ops.cpp
compile_gen.cpp
compile_hip.cpp
compile_hip_code_object.cpp
compile_pointwise.cpp
compiler.cpp
concat.cpp
convert.cpp
convolution.cpp
......@@ -136,11 +151,15 @@ add_library(migraphx_gpu
kernel.cpp
lowering.cpp
logsoftmax.cpp
loop.cpp
lrn.cpp
leaky_relu.cpp
mlir_conv.cpp
multinomial.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
pooling.cpp
quant_convolution.cpp
......@@ -152,7 +171,9 @@ add_library(migraphx_gpu
softmax.cpp
sync_device.cpp
target.cpp
topk.cpp
write_literals.cpp
${JIT_GPU_SRCS}
)
set_target_properties(migraphx_gpu PROPERTIES EXPORT_NAME gpu)
......@@ -190,9 +211,12 @@ register_migraphx_gpu_ops(hip_
logical_and
logical_or
logical_xor
loop
max
min
mul
multinomial
nonzero
pad
pow
prelu
......@@ -218,7 +242,9 @@ register_migraphx_gpu_ops(hip_
sub
tanh
tan
topk
unary_not
where
)
register_migraphx_gpu_ops(miopen_
abs
......@@ -284,19 +310,37 @@ if(MIGRAPHX_ENABLE_MLIR)
target_link_libraries(migraphx_gpu PUBLIC ${LIBMLIRMIOPEN})
endif()
set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")
if(MIGRAPHX_USE_HIPRTC)
target_compile_definitions(migraphx_gpu PRIVATE -DMIGRAPHX_USE_HIPRTC=1)
else()
# Get flags needed to compile hip
include(TargetFlags)
target_flags(HIP_COMPILER_FLAGS hip::device)
# Remove cuda arch flags
string(REGEX REPLACE "--cuda-gpu-arch=[^ \t\r\n]+" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE "--offload-arch=[^ \t\r\n]+" "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
string(REGEX REPLACE --offload-arch=[a-z0-9:+-]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
# Skip library paths since hip will incorrectly treat it as a source file
string(APPEND HIP_COMPILER_FLAGS " ")
foreach(_unused RANGE 2)
string(REGEX REPLACE " /[^ ]+\\.(a|so) " " " HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
endforeach()
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
target_compile_definitions(migraphx_gpu PRIVATE
"-DMIGRAPHX_HIP_COMPILER=${CMAKE_CXX_COMPILER}"
"-DMIGRAPHX_HIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}"
"-DMIGRAPHX_OFFLOADBUNDLER_BIN=${MIGRAPHX_OFFLOADBUNDLER_BIN}"
"-DMIGRAPHX_EXTRACT_KERNEL=${MIGRAPHX_EXTRACT_KERNEL}"
"-DMIGRAPHX_USE_HIPRTC=0"
)
if(DEFINED CMAKE_CXX_COMPILER_LAUNCHER)
execute_process(COMMAND which ${CMAKE_CXX_COMPILER_LAUNCHER} OUTPUT_VARIABLE MIGRAPHX_HIP_COMPILER_LAUNCHER)
string(STRIP "${MIGRAPHX_HIP_COMPILER_LAUNCHER}" MIGRAPHX_HIP_COMPILER_LAUNCHER)
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_HIP_COMPILER_LAUNCHER=${MIGRAPHX_HIP_COMPILER_LAUNCHER}")
endif()
endif()
# Check miopen find mode api
include(CheckLibraryExists)
......@@ -317,7 +361,7 @@ target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
add_subdirectory(driver)
rocm_install_targets(
TARGETS migraphx_gpu migraphx_device
TARGETS migraphx_gpu migraphx_device compile_for_gpu
INCLUDE
${CMAKE_CURRENT_SOURCE_DIR}/include
)
......
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/module.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......
......@@ -28,30 +28,30 @@ struct hip_stream_model
bool is_wait(migraphx::instruction_ref ins) const { return ins->name() == "gpu::wait_event"; }
};
stream_model make_stream_model(const module& p)
stream_model make_stream_model(const module& m)
{
hip_stream_model m;
hip_stream_model hsm;
std::size_t stream = 0;
for(auto ins : iterator_for(p))
for(auto ins : iterator_for(m))
{
if(ins->name() == "gpu::set_stream")
{
auto v = ins->get_operator().to_value();
stream = v["stream"].to<std::size_t>();
m.max_stream = std::max(stream, m.max_stream);
auto v = ins->get_operator().to_value();
stream = v["stream"].to<std::size_t>();
hsm.max_stream = std::max(stream, hsm.max_stream);
}
if(ins->get_operator().is_context_free())
continue;
if(contains({"hip::hip_allocate_memory", "hip::hip_copy_literal", "@param"}, ins->name()))
continue;
m.ins2stream[ins] = stream;
hsm.ins2stream[ins] = stream;
}
return m;
return hsm;
}
std::vector<stream_race> analyze_streams(const module& p)
std::vector<stream_race> analyze_streams(const module& m)
{
return migraphx::analyze_streams(p, make_stream_model(p));
return migraphx::analyze_streams(m, make_stream_model(m));
}
} // namespace gpu
......
......@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmax::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)});
}
......
......@@ -9,7 +9,7 @@ namespace gpu {
shape hip_argmin::compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2).standard();
check_shapes{inputs, *this}.has(2);
return op.normalize_compute_shape({inputs.at(0)});
}
......
#include <migraphx/gpu/compile_gen.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/permutation.hpp>
#include <migraphx/stringutils.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace gen {
static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
{
// If all inputs are half then only use half2
if(std::all_of(inputs.begin(), inputs.end(), [](const auto& s) {
return s.type() == shape::half_type;
}))
return {2};
return {4, 2};
}
vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
{
auto sizes = vector_sizes(inputs);
std::vector<std::size_t> max_vec_size;
std::transform(inputs.begin(),
inputs.end(),
std::back_inserter(max_vec_size),
[&](const auto& input) -> std::size_t {
auto stride = input.strides()[axis];
auto len = input.lens()[axis];
if(stride != 0 and stride != 1)
return 1;
if(len == 1 and input.elements() > sizes.front())
return sizes.front();
auto it = std::find_if(
sizes.begin(), sizes.end(), [&](auto i) { return (len % i) == 0; });
if(it != sizes.end())
return *it;
return 1;
});
return {*std::min_element(max_vec_size.begin(), max_vec_size.end()), axis};
}
std::string vectorize::str() const
{
return "vectorize<" + to_string(size) + ", " + to_string(axis) + ">()";
}
preload preload::broadcasts(std::size_t axis, const std::vector<shape>& inputs)
{
const std::size_t max_lds_bytes = 4096;
std::vector<bool> result;
std::transform(inputs.begin(),
inputs.end(),
std::back_inserter(result),
[&](const shape& input) { return input.strides()[axis] == 0; });
auto bytes = std::inner_product(inputs.begin(),
inputs.end(),
result.begin(),
std::size_t{0},
std::plus<>{},
[](const shape& s, bool b) -> std::size_t {
if(b)
return s.bytes();
return 0;
});
if(bytes < max_lds_bytes)
return {result};
// TODO: Try to partially preload items
std::fill(result.begin(), result.end(), false);
return {result};
}
std::string preload::str() const
{
std::vector<std::string> bool_strs;
std::transform(args.begin(), std::prev(args.end()), std::back_inserter(bool_strs), [](bool b) {
if(b)
return "true";
return "false";
});
return "auto_preload<false, " + join_strings(bool_strs, ", ") + ">(idx)";
}
bool preload::is_preloading() const
{
return std::accumulate(args.begin(), args.end(), false, std::logical_or<>{});
}
std::size_t find_fast_axis(const std::vector<shape>& inputs)
{
auto permutation = find_permutation(inputs);
auto it = std::max_element(permutation.begin(), permutation.end());
return it - permutation.begin();
}
std::string make_transformer_args(std::vector<std::string> transformers)
{
return join_strings(std::move(transformers), ", ");
}
} // namespace gen
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#include <migraphx/gpu/compile_hip.hpp>
#include <migraphx/errors.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/env.hpp>
#include <cassert>
#include <iostream>
#if MIGRAPHX_USE_HIPRTC
#include <hip/hiprtc.h>
#include <migraphx/manage_ptr.hpp>
#include <migraphx/env.hpp>
#else
#include <migraphx/compile_src.hpp>
#include <migraphx/process.hpp>
#endif
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
#if MIGRAPHX_USE_HIPRTC
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_HIPRTC)
std::string hiprtc_error(hiprtcResult err, const std::string& msg)
{
return "hiprtc: " + (hiprtcGetErrorString(err) + (": " + msg));
}
void hiprtc_check_error(hiprtcResult err, const std::string& msg, const std::string& ctx)
{
if(err != HIPRTC_SUCCESS)
throw make_exception(ctx, hiprtc_error(err, msg));
}
#define MIGRAPHX_HIPRTC(...) \
hiprtc_check_error(__VA_ARGS__, #__VA_ARGS__, MIGRAPHX_MAKE_SOURCE_CTX())
#define MIGRAPHX_HIPRTC_THROW(error, msg) MIGRAPHX_THROW(hiprtc_error(error, msg))
// Workaround hiprtc's broken API
void hiprtc_program_destroy(hiprtcProgram prog) { hiprtcDestroyProgram(&prog); }
using hiprtc_program_ptr = MIGRAPHX_MANAGE_PTR(hiprtcProgram, hiprtc_program_destroy);
template <class... Ts>
hiprtc_program_ptr hiprtc_program_create(Ts... xs)
{
hiprtcProgram prog = nullptr;
auto result = hiprtcCreateProgram(&prog, xs...);
hiprtc_program_ptr p{prog};
if(result != HIPRTC_SUCCESS)
MIGRAPHX_HIPRTC_THROW(result, "Create program failed.");
return p;
}
struct hiprtc_program
{
struct string_array
{
std::vector<std::string> strings{};
std::vector<const char*> c_strs{};
string_array() {}
string_array(const string_array&) = delete;
std::size_t size() const { return strings.size(); }
const char** data() { return c_strs.data(); }
void push_back(std::string s)
{
strings.push_back(std::move(s));
c_strs.push_back(strings.back().c_str());
}
};
hiprtc_program_ptr prog = nullptr;
string_array headers{};
string_array include_names{};
std::string cpp_src = "";
std::string cpp_name = "";
hiprtc_program(const std::vector<src_file>& srcs)
{
for(auto&& src : srcs)
{
std::string content{src.content.first, src.content.second};
std::string path = src.path.string();
if(src.path.extension().string() == ".cpp")
{
cpp_src = std::move(content);
cpp_name = std::move(path);
}
else
{
headers.push_back(std::move(content));
include_names.push_back(std::move(path));
}
}
prog = hiprtc_program_create(cpp_src.c_str(),
cpp_name.c_str(),
headers.size(),
headers.data(),
include_names.data());
}
void compile(const std::vector<std::string>& options)
{
if(enabled(MIGRAPHX_TRACE_HIPRTC{}))
std::cout << "hiprtc " << join_strings(options, " ") << " " << cpp_name << std::endl;
std::vector<const char*> c_options;
std::transform(options.begin(),
options.end(),
std::back_inserter(c_options),
[](const std::string& s) { return s.c_str(); });
auto result = hiprtcCompileProgram(prog.get(), c_options.size(), c_options.data());
std::cerr << log() << std::endl;
if(result != HIPRTC_SUCCESS)
MIGRAPHX_HIPRTC_THROW(result, "Compilation failed.");
}
std::string log()
{
std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetProgramLogSize(prog.get(), &n));
if(n < 2)
return {};
std::vector<char> buffer(n);
MIGRAPHX_HIPRTC(hiprtcGetProgramLog(prog.get(), buffer.data()));
assert(buffer.back() == 0);
return {buffer.begin(), buffer.end() - 1};
}
std::vector<char> get_code_obj()
{
std::size_t n = 0;
MIGRAPHX_HIPRTC(hiprtcGetCodeSize(prog.get(), &n));
std::vector<char> buffer(n);
MIGRAPHX_HIPRTC(hiprtcGetCode(prog.get(), buffer.data()));
return buffer;
}
};
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
{
hiprtc_program prog(srcs);
auto options = split_string(params, ' ');
if(enabled(MIGRAPHX_GPU_DEBUG{}))
options.push_back("-DMIGRAPHX_DEBUG");
if(std::none_of(options.begin(), options.end(), [](const std::string& s) {
return starts_with(s, "--std=") or starts_with(s, "-std=");
}))
options.push_back("-std=c++17");
options.push_back("-fno-gpu-rdc");
options.push_back(" -O" + string_value_of(MIGRAPHX_GPU_OPTIMIZE{}, "3"));
options.push_back("-Wno-cuda-compat");
options.push_back("--cuda-gpu-arch=" + arch);
prog.compile(options);
return {prog.get_code_obj()};
}
#else // MIGRAPHX_USE_HIPRTC
bool is_hcc_compiler()
{
static const auto result = ends_with(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER), "hcc");
......@@ -22,8 +180,18 @@ bool is_hip_clang_compiler()
return result;
}
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
bool has_compiler_launcher()
{
static const auto result = fs::exists(MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_LAUNCHER));
return result;
}
src_compiler assemble(src_compiler compiler)
{
compiler.out_ext = ".S";
compiler.flags = replace_string(compiler.flags, " -c", " -S");
return compiler;
}
std::vector<std::vector<char>>
compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std::string& arch)
......@@ -57,6 +225,10 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
src_compiler compiler;
compiler.flags = params;
compiler.compiler = MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER);
#ifdef MIGRAPHX_HIP_COMPILER_LAUNCHER
if(has_compiler_launcher())
compiler.launcher = MIGRAPHX_STRINGIZE(MIGRAPHX_HIP_COMPILER_LAUNCHER);
#endif
if(is_hcc_compiler())
compiler.process = [&](const fs::path& obj_path) -> fs::path {
......@@ -75,9 +247,34 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
MIGRAPHX_THROW("Missing hsaco");
};
if(enabled(MIGRAPHX_GPU_DUMP_SRC{}))
{
for(const auto& src : srcs)
{
if(src.path.extension() != ".cpp")
continue;
std::cout << std::string(src.content.first, src.len()) << std::endl;
}
}
if(enabled(MIGRAPHX_GPU_DUMP_ASM{}))
{
std::cout << assemble(compiler).compile(srcs).data() << std::endl;
}
return {compiler.compile(srcs)};
}
std::string enum_params(std::size_t count, std::string param)
{
std::vector<std::string> items(count);
transform(range(count), items.begin(), [&](auto i) { return param + std::to_string(i); });
return join_strings(items, ",");
}
#endif // MIGRAPHX_USE_HIPRTC
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -68,8 +68,72 @@ __content__
return replace_string(args_hpp, "__content__", inner);
}
const std::vector<std::string>& compiler_warnings()
{
static std::vector<std::string> warnings = {"-Weverything",
"-Wno-c++98-compat",
"-Wno-c++98-compat-pedantic",
"-Wno-conversion",
"-Wno-double-promotion",
"-Wno-exit-time-destructors",
"-Wno-extra-semi",
"-Wno-extra-semi-stmt",
"-Wno-float-conversion",
"-Wno-gnu-anonymous-struct",
"-Wno-gnu-zero-variadic-macro-arguments",
"-Wno-missing-prototypes",
"-Wno-nested-anon-types",
"-Wno-padded",
"-Wno-shorten-64-to-32",
"-Wno-sign-conversion",
"-Wno-sign-compare",
"-Wno-unused-command-line-argument",
"-Wno-weak-vtables",
"-Wno-c99-extensions"};
return warnings;
}
void hip_compile_options::set_launch_params(
const value& v,
const std::function<std::size_t(std::size_t local)>& compute_global,
std::size_t default_local)
{
local = v.get("local", default_local);
if(v.contains("global"))
global = v.at("global").to<std::size_t>();
else
global = compute_global(local);
}
std::function<std::size_t(std::size_t local)>
compute_global_for(context& ctx, std::size_t n, std::size_t over)
{
assert(over > 0);
std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) {
std::size_t groups = (n + local - 1) / local;
std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local;
return nglobal;
};
}
std::size_t compute_block_size(std::size_t n, std::size_t max_block_size)
{
size_t block_size = 128;
while(block_size <= max_block_size and block_size <= n)
block_size *= 2;
return block_size / 2;
}
operation compile_hip_code_object(const std::string& content, hip_compile_options options)
{
assert(options.global > 0);
assert(options.local > 0);
assert(not options.inputs.empty());
assert(options.inputs.size() == options.virtual_inputs.size() or
options.virtual_inputs.empty());
std::vector<src_file> srcs;
std::transform(migraphx_kernels().begin(),
migraphx_kernels().end(),
......@@ -83,12 +147,14 @@ operation compile_hip_code_object(const std::string& content, hip_compile_option
srcs.push_back(src_file{fs::path{"main.cpp"},
std::make_pair(content.data(), content.data() + content.size())});
auto args_hpp =
generate_args_hpp(options.reduced_inputs.empty() ? options.inputs : options.reduced_inputs);
generate_args_hpp(options.virtual_inputs.empty() ? options.inputs : options.virtual_inputs);
srcs.push_back(src_file{fs::path{"args.hpp"},
std::make_pair(args_hpp.data(), args_hpp.data() + args_hpp.size())});
options.params += " -DMIGRAPHX_NGLOBAL=" + std::to_string(options.global);
options.params += " -DMIGRAPHX_NLOCAL=" + std::to_string(options.local);
options.params += " -I.";
options.params += " " + join_strings(compiler_warnings(), " ");
options.params += " -ftemplate-backtrace-limit=0";
options.params += " -Werror";
auto cos = compile_hip_src(srcs, std::move(options.params), get_device_name());
if(cos.size() != 1)
MIGRAPHX_THROW("No code object");
......
#include <migraphx/gpu/compile_ops.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/module.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/par_for.hpp>
#include <migraphx/register_op.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/gpu/compiler.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
struct precompile_op
{
operation op = op::identity{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.op, "op"));
}
std::string name() const { return "gpu::precompile_op"; }
shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
{
inputs.pop_back();
return op.compute_shape(inputs, mods);
}
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
MIGRAPHX_REGISTER_OP(precompile_op);
struct compiled_result
{
compiler_replace replace;
instruction_ref ins;
};
template <class F>
void par_compile(std::size_t n, F f)
{
if(n == 0)
return;
par_for(n, n / value_of(MIGRAPHX_GPU_COMPILE_PARALLEL{}, n), f);
}
void compile_ops::apply(module& m) const
{
std::vector<std::function<compiled_result()>> compiles;
for(auto ins : iterator_for(m))
{
if(ins->name() != "gpu::precompile_op")
continue;
operation preop = any_cast<precompile_op>(ins->get_operator()).op;
compiles.emplace_back([=]() -> compiled_result {
return {compile(*ctx, ins, preop), ins};
});
}
std::vector<compiled_result> results(compiles.size());
par_compile(compiles.size(), [&](auto i) { results[i] = compiles[i](); });
for(const auto& cr : results)
{
cr.replace(m, cr.ins);
}
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment