Commit baac1dab authored by Alan Turner's avatar Alan Turner
Browse files

Merge remote-tracking branch 'origin/develop' into ck-host-lib

parents 830dff7a 77042e30
......@@ -244,13 +244,13 @@ __device__ void print_once(Ts... xs)
template <class... Ts>
__device__ void println(Ts... xs)
{
print_each(&coutln, xs...);
print_each(&cout, xs..., '\n');
}
template <class... Ts>
__device__ void println_once(Ts... xs)
{
print_each_once(&coutln, xs...);
print_each_once(&cout, xs..., '\n');
}
} // namespace migraphx
......
......@@ -79,20 +79,21 @@ __device__ void dpp_reduce(T& in, Op op)
#endif
// NOLINTNEXTLINE
#define MIGRAPHX_DPP_REDUCE(op, prefix) \
#define MIGRAPHX_DPP_REDUCE(op, prefix, sign) \
__device__ inline void dpp_reduce(double& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f64); } \
__device__ inline void dpp_reduce(float& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f32); } \
__device__ inline void dpp_reduce(half& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_f16); } \
__device__ inline void dpp_reduce(int32_t& x, op) \
{ \
MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_u32); \
MIGRAPHX_DPP_REDUCE_ASM(x, prefix##sign##32); \
} \
__device__ inline void dpp_reduce(uint32_t& x, op) { MIGRAPHX_DPP_REDUCE_ASM(x, prefix##_u32); }
MIGRAPHX_DPP_REDUCE(op::sum, v_add)
MIGRAPHX_DPP_REDUCE(op::max, v_max)
MIGRAPHX_DPP_REDUCE(op::min, v_min)
MIGRAPHX_DPP_REDUCE(op::product, v_mul)
// Note: when max and min are in int32_t, signed version of instruction needs to be used.
MIGRAPHX_DPP_REDUCE(op::sum, v_add, _u)
MIGRAPHX_DPP_REDUCE(op::product, v_mul, _u)
MIGRAPHX_DPP_REDUCE(op::max, v_max, _i)
MIGRAPHX_DPP_REDUCE(op::min, v_min, _i)
template <class Op, class T, class Index, class F>
__device__ auto block_reduce(index idx, Op op, T init, Index n, F f)
......@@ -174,6 +175,25 @@ struct inner_storage_tag
template <class T>
using is_inner_storage = is_base_of<inner_storage_tag, remove_cv_t<remove_reference_t<T>>>;
template <class Size, class F>
struct lazy_inner_storage : inner_storage_tag
{
using type = remove_reference_t<decltype(declval<F>()(0, _c<0>))>;
F f;
constexpr Size rsize() const { return {}; }
template <class U, class V>
constexpr auto operator()(U j, V d) const
{
return f(j, d);
}
};
template <class Size, class F>
constexpr lazy_inner_storage<Size, F> make_lazy_inner_storage(Size, F f)
{
return {{}, f};
}
template <class R, class F>
struct storage_access : F
{
......@@ -278,6 +298,14 @@ struct reducer_base
});
}
template <class F>
__device__ auto lazy_inner(F f) const
{
return this->inner_sliced([=](auto n, auto&&... xs) {
return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); });
});
}
template <class Op, class T, class Read>
__device__ auto reduce(Op op, T init, Read read) const
{
......@@ -396,25 +424,6 @@ struct block_large
index idx;
Slicer slice;
template <class Size, class F>
struct inner_storage : inner_storage_tag
{
using type = remove_reference_t<decltype(declval<F>()(0, _c<0>))>;
F f;
constexpr Size rsize() const { return {}; }
template <class U, class V>
constexpr auto operator()(U j, V d) const
{
return f(j, d);
}
};
template <class Size, class F>
constexpr inner_storage<Size, F> make_inner_storage(Size, F f)
{
return {f};
}
template <class Op, class T, class Read, class N, class... Ts>
__device__ auto reduce_impl(Op op, T init, Read read, N n, Ts&&... xs) const
{
......@@ -439,7 +448,7 @@ struct block_large
template <class R, class F, class N, class... Ts>
__device__ auto inner_impl(F f, N n, Ts&&... xs) const
{
return make_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); });
return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); });
}
};
......@@ -469,25 +478,6 @@ struct lane
index idx;
Slicer slice;
template <class Size, class F>
struct inner_storage : inner_storage_tag
{
using type = remove_reference_t<decltype(declval<F>()(0, _c<0>))>;
F f;
constexpr Size rsize() const { return {}; }
template <class U, class V>
constexpr auto operator()(U j, V d) const
{
return f(j, d);
}
};
template <class Size, class F>
constexpr inner_storage<Size, F> make_inner_storage(Size, F f)
{
return {f};
}
template <class Op, class T, class Read, class N, class U, class... Us>
__device__ auto reduce_impl(Op op, T init, Read read, N n, U&& x, Us&&... xs) const
{
......@@ -518,7 +508,7 @@ struct lane
template <class R, class F, class N, class... Ts>
__device__ auto inner_impl(F f, N n, Ts&&... xs) const
{
return make_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); });
return make_lazy_inner_storage(n, [=](auto j, auto d) { return f(xs(j, d)...); });
}
};
template <class Slicer>
......@@ -577,5 +567,21 @@ simple_reduce(Op op, T init, Input input, Output output, ReadInput read, WriteOu
});
}
template <class Algo, class Reduced, class Output, class F>
__device__ void fused_reduce(Output output, F f)
{
Algo::template run<Reduced>([&](auto out_idx, auto r) {
auto result = f(r, out_idx);
if constexpr(reduce::is_inner_storage<decltype(result)>{})
{
r.inner([&](auto& y, auto x) { y = x; })(output, result);
}
else
{
r.outer([&] { output[out_idx] = implicit_conversion(result); });
}
});
}
} // namespace migraphx
#endif // MIGRAPHX_GUARD_KERNELS_REDUCE_HPP
......@@ -218,7 +218,15 @@ using common_type_t = typename common_type<Ts...>::type;
#define MIGRAPHX_REQUIRES(...) class = enable_if_t<__VA_ARGS__>
constexpr unsigned long int_max(unsigned long n) { return (1u << (n * 8)) - 1; }
constexpr unsigned long int_max(unsigned long n)
{
// Note, left shift cannot be used to get the maximum value of int64_type or
// uint64_type because it is undefined behavior to left shift 64 bits for
// these types
if(n == sizeof(int64_t))
return -1;
return (1ul << (n * 8)) - 1;
}
template <class T,
MIGRAPHX_REQUIRES(is_integral<T>{} or is_floating_point<T>{} or
......@@ -228,9 +236,9 @@ constexpr T numeric_max()
if constexpr(is_integral<T>{})
{
if constexpr(is_unsigned<T>{})
return int_max(sizeof(T)) * 2;
else
return int_max(sizeof(T));
else
return int_max(sizeof(T)) / 2;
}
else if constexpr(is_same<T, double>{})
return __DBL_MAX__;
......
......@@ -135,7 +135,7 @@ constexpr vec<vec_type<T>, N> vec_packed_at(T x, I i)
return vec<T, N>{x};
else
{
MIGRAPHX_ASSERT((i + N) < vec_size<T>());
MIGRAPHX_ASSERT((i + N) <= vec_size<T>());
vec<vec_type<T>, N> result = {0};
for(int j = 0; j < N; j++)
{
......
......@@ -83,8 +83,7 @@ struct miopen_apply
auto& ctx = get_context();
int8_x4_format = get_int8_x4_format(ctx);
compute_fp32 = get_compute_fp32_flag();
offload_copy = (mod->name() == "main") ? pass->offload_copy : false;
offload_copy = (mod->name() == "main") ? pass->offload_copy : false;
add_generic_op("contiguous");
......@@ -112,6 +111,7 @@ struct miopen_apply
add_loop_op();
add_neg_op();
add_nms_op();
add_select_module_op();
}
void copy_params() const
......@@ -359,6 +359,20 @@ struct miopen_apply
return mod->replace_instruction(ins, gpu_out);
});
}
/**
* Adds dynamic allocation for submodule output parameter.
*/
void add_select_module_op()
{
apply_map.emplace("select_module", [=](instruction_ref ins) {
auto s = ins->get_shape();
auto output = insert_allocation(ins, s);
std::vector<instruction_ref> inputs = ins->inputs();
inputs.push_back(output);
return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
});
}
};
void lowering::apply(module& m) const { miopen_apply{&m, this}.apply(); }
......
......@@ -30,6 +30,7 @@
#include <mlir-c/BuiltinTypes.h>
#include <mlir-c/Diagnostics.h>
#include <mlir-c/Dialect/MIGraphX.h>
#include <mlir-c/Dialect/Rock.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mutex>
......@@ -55,12 +56,16 @@
#include <migraphx/permutation.hpp>
#include <deque>
#include <variant>
#include <fstream>
#include <sstream>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MLIR);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_DB);
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_CFG);
#ifdef MIGRAPHX_MLIR
template <class T, class F, F f> // NOLINT
......@@ -124,6 +129,8 @@ using mlir_op_printing_flags = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirOpPrintingFlags,
using mlir_region = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRegion, mlirRegionDestroy);
using mlir_block = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirBlock, mlirBlockDestroy);
using mlir_pass_manager = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirPassManager, mlirPassManagerDestroy);
using mlir_tuning_table = MIGRAPHX_MANAGE_MLIR_HANDLE(MlirRockTuningTable,
mlirRockTuningTableDestroy);
std::string_view to_string_view(MlirStringRef s) { return {s.data, s.length}; }
......@@ -157,10 +164,10 @@ std::string mlir_print(F f, T x)
return ss.str();
}
const std::unordered_set<std::string>& get_xdlops_archs()
bool has_xdlops(const std::string& target_arch)
{
static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
return supported_archs;
const auto device_name = trim(split_string(target_arch, ':').front());
return (starts_with(device_name, "gfx9") and device_name >= "gfx908");
}
struct mlir_program
......@@ -190,10 +197,14 @@ struct mlir_program
result = mlirF64TypeGet(ctx.get());
else if(as.is_integral())
{
if(as.is_signed())
result = mlirIntegerTypeSignedGet(ctx.get(), as.size() * 8);
else
result = mlirIntegerTypeGet(ctx.get(), as.size() * 8);
// Note: rocMLIR use signless integer type for tensors types. This
// will translate to signed implementation for current supported
// operations.
if(as.is_unsigned())
{
MIGRAPHX_THROW("Unsupported type: " + std::to_string(as.type_enum()));
}
result = mlirIntegerTypeGet(ctx.get(), as.size() * 8);
}
else
MIGRAPHX_THROW("Unsupported type: " + std::to_string(as.type_enum()));
......@@ -313,7 +324,8 @@ struct mlir_program
std::string,
value,
std::vector<value>,
MlirType>;
MlirType,
MlirAttribute>;
using named_attribute_t = std::pair<std::string_view, attribute_t>;
MlirNamedAttribute name_attribute(const named_attribute_t& na) const
......@@ -455,7 +467,7 @@ struct mlir_program
auto ops = create_operation_state("func.func");
ops.add_attributes({{"function_type", make_function_type(inputs, outputs)},
{"sym_name", std::string("main")},
{"sym_name", sym_name},
{"kernel", std::string("mixr")},
{"arch", target_arch}});
ops.add_region(std::move(region));
......@@ -470,13 +482,17 @@ struct mlir_program
{
if(ins->name() == "@return")
return "func.return";
if(ins->name() == "@literal")
{
return "tosa.const";
}
return "migraphx." + ins->name();
}
static value get_operator_value(const operation& op)
{
auto v = op.to_value();
if(op.name() == "convolution")
if(op.name() == "convolution" or op.name() == "quant_convolution")
{
// Adjust symetrical padding
if(v.at("padding").size() == v.at("stride").size())
......@@ -498,31 +514,53 @@ struct mlir_program
return ins->get_shape();
}
static std::string get_symbol_name(const module& m)
{
for(auto ins : iterator_for(m))
{
if(ins->name() == "convolution" or ins->name() == "dot")
{
return "mlir_" + ins->name();
}
}
return "main";
}
void parse(const module& m)
{
sym_name = get_symbol_name(m);
auto mbody = mlirModuleGetBody(mmodule.get());
std::unordered_map<instruction_ref, MlirValue> ins_map;
auto fbody = insert(mbody, m, ins_map);
for(auto ins : iterator_for(m))
{
if(ins->name() == "@param")
continue;
if(ins->name() == "contiguous")
{
ins_map[ins] = ins_map[ins->inputs().at(0)];
continue;
}
auto name = get_name(ins);
auto ops = create_operation_state(name);
ops.add_attribute_value(get_operator_value(ins->get_operator()));
if(ins->name() != "@return")
ops.add_results({get_shape(ins)});
if(ins->name() == "convolution")
if(ins->name() == "@literal")
{
literal r = ins->get_literal();
MlirType tensor_type = make_tensor(ins->get_shape());
MlirAttribute mlir_value_attr =
mlirDenseElementsAttrRawBufferGet(tensor_type, r.get_shape().bytes(), r.data());
ops.add_attributes({{"value", mlir_value_attr}});
}
if(ins->name() == "convolution" or ins->name() == "dot")
{
pp =
problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
// check if HW supports xdlops
auto target_chip = trim(split_string(target_arch, ':').front());
bool xdlops = contains(get_xdlops_archs(), target_chip);
std::string tuned = get_tune_params(xdlops);
if(not tuned.empty())
ops.add_attributes({{"perf_config", tuned}});
if(xdlops)
if(has_xdlops(target_arch))
ops.add_attributes({{"xdlopsV2", true}});
}
......@@ -542,15 +580,19 @@ struct mlir_program
code_object_op compile() MIGRAPHX_TIDY_CONST
{
mlir_pass_manager pm{mlirPassManagerCreate(ctx.get())};
mlir_pass_manager pm_front{mlirPassManagerCreate(ctx.get())};
mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline(pm.get());
mlirMIGraphXAddHighLevelPipeline(pm_front.get());
mlirPassManagerRun(pm_front.get(), mmodule.get());
// 2nd pipeline to call
mlirMIGraphXAddBackendPipeline(pm.get(), target_arch.c_str());
mlirPassManagerRun(pm.get(), mmodule.get());
get_module_tuned();
mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
mlirPassManagerRun(pm_back.get(), mmodule.get());
code_object_op op{};
op.symbol_name = "main";
op.symbol_name = sym_name;
op.code_object = get_binary();
std::tie(op.global, op.local) = get_launch_params();
return op;
......@@ -578,7 +620,74 @@ struct mlir_program
MIGRAPHX_THROW("Failed to compile mlir program");
}
std::string get_tune_params(bool xdlops) { return get_mlir_perf_for_conv(pp, xdlops); }
std::string get_tune_params(bool xdlops) const { return get_mlir_perf_for_conv(pp, xdlops); }
// This function appends to tuning cfg file that could be
// used with rocMLIR tuning scripts.
void dump_tuning_cfg(const char* prob_config) const
{
std::string tuning_cfg_path = string_value_of(MIGRAPHX_MLIR_TUNING_CFG{});
if(!tuning_cfg_path.empty())
{
std::vector<std::string> tokens = split_string(prob_config, '\t');
std::string prob = tokens[1];
if(starts_with(prob, "conv"))
{
tuning_cfg_path += ".conv";
}
else
{
tuning_cfg_path += ".gemm";
}
std::ofstream tuning_cfg(tuning_cfg_path, std::ios::app);
tuning_cfg << prob << std::endl;
}
}
static mlir_tuning_table create_tuning_table()
{
mlir_tuning_table tuning_table{mlirRockTuningTableCreate()};
std::string tuning_db_path = string_value_of(MIGRAPHX_MLIR_TUNING_DB{});
if(!tuning_db_path.empty())
{
std::ifstream tuning_db_tsv(tuning_db_path);
if(tuning_db_tsv)
{
std::string line;
while(std::getline(tuning_db_tsv, line))
{
std::vector<std::string> tokens = split_string(line, '\t');
std::string arch = tokens[0];
std::string prob = tokens[1];
std::string perf = tokens[2];
std::string key = arch.append("\t").append(prob);
mlirRockTuningUpdateTable(tuning_table.get(), key.c_str(), perf.c_str(), 1.0);
}
}
}
else
{
std::cerr
<< "WARNING: MLIR tuning db not found. Please set MIGRAPHX_MLIR_TUNING_DB for "
"optimal performance."
<< std::endl;
}
return tuning_table;
}
bool get_module_tuned() const
{
static mlir_tuning_table tuning_table = create_tuning_table();
if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
{
const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
std::stringstream key(prob_config);
std::cerr << "fails to set param on" << prob_config << std::endl;
dump_tuning_cfg(prob_config);
return false;
}
return true;
}
mlir_context ctx;
MlirLocation location;
......@@ -586,6 +695,7 @@ struct mlir_program
problem_params pp;
std::deque<std::string> strings{};
std::string target_arch;
std::string sym_name;
};
std::string dump_mlir(const module& m)
......@@ -645,12 +755,13 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{
adjust_param_shapes(m, inputs);
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
if(trace)
std::cout << m << std::endl;
// set mutex while llvm thread support is disabled.
static std::mutex g_mlirc_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
if(trace)
std::cout << m << std::endl;
mlir_program mp;
mp.find_target();
mp.parse(m);
......
......@@ -47,32 +47,17 @@ rocblas_handle_ptr create_rocblas_handle_ptr(hipStream_t s)
return rb;
}
const std::unordered_set<std::string>& get_rocblas_fp32_archs()
{
static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
return supported_archs;
}
bool get_compute_fp32_flag()
{
bool compute_fp32 = false;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
const auto device_name = trim(split_string(get_device_name(), ':').front());
if(contains(get_rocblas_fp32_archs(), device_name))
compute_fp32 = true;
#endif
return compute_fp32;
return (starts_with(device_name, "gfx9") and device_name >= "gfx908");
}
bool get_int8_x4_format(context& ctx)
{
bool int8_x4_format = true;
#if ROCBLAS_VERSION_MAJOR >= 2 && ROCBLAS_VERSION_MINOR >= 38
rocblas_gemm_flags flag;
rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
int8_x4_format = (flag == rocblas_gemm_flags_pack_int8x4);
#endif
return int8_x4_format;
return flag == rocblas_gemm_flags_pack_int8x4;
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -26,13 +26,13 @@
#include <migraphx/check_context.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/eliminate_allocation.hpp>
#include <migraphx/eliminate_common_subexpression.hpp>
#include <migraphx/eliminate_concat.hpp>
#include <migraphx/eliminate_contiguous.hpp>
#include <migraphx/eliminate_data_type.hpp>
#include <migraphx/eliminate_identity.hpp>
#include <migraphx/eliminate_pad.hpp>
#include <migraphx/fuse_pointwise.hpp>
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/inline_module.hpp>
#include <migraphx/insert_pad.hpp>
#include <migraphx/layout_nhwc.hpp>
......@@ -40,7 +40,7 @@
#include <migraphx/normalize_ops.hpp>
#include <migraphx/optimize_module.hpp>
#include <migraphx/preallocate_param.hpp>
#include <migraphx/propagate_constant.hpp>
#include <migraphx/promote_literals.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/replace_allocate.hpp>
#include <migraphx/rewrite_gelu.hpp>
......@@ -48,9 +48,9 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
#include <migraphx/gpu/allocation_model.hpp>
#include <migraphx/gpu/compile_miopen.hpp>
#include <migraphx/gpu/compile_ops.hpp>
......@@ -74,9 +74,8 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_SCHEDULE_PASS)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_POINTWISE_FUSION)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_REDUCE_FUSION)
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_NHWC)
struct id_pass
{
std::string name() const { return "id"; }
......@@ -93,20 +92,24 @@ pass enable_pass(bool enabled, pass p)
std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options& options) const
{
auto& ctx = any_cast<context>(gctx);
ctx.set_exhaustive_tune_flag(options.exhaustive_tune);
std::set<shape::type_t> unsupported_types(shape::types().begin(), shape::types().end());
unsupported_types.erase(shape::type_t::float_type);
unsupported_types.erase(shape::type_t::half_type);
unsupported_types.erase(shape::type_t::bool_type);
unsupported_types.erase(shape::type_t::int8_type);
unsupported_types.erase(shape::type_t::uint8_type);
unsupported_types.erase(shape::type_t::int32_type);
unsupported_types.erase(shape::type_t::tuple_type);
// clang-format off
return
{
split_single_dyn_dim{},
dead_code_elimination{},
normalize_ops{},
dead_code_elimination{},
simplify_qdq{},
rewrite_quantization{},
enable_pass(not mlir_enabled(), rewrite_quantization{}),
dead_code_elimination{},
eliminate_data_type{unsupported_types, shape::type_t::float_type},
simplify_reshapes{},
......@@ -130,9 +133,11 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
fuse_ck_gemm_softmax_gemm{&ctx},
dead_code_elimination{},
optimize_module{},
enable_pass(not enabled(MIGRAPHX_DISABLE_POINTWISE_FUSION{}), fuse_pointwise{}),
fuse_pointwise{},
dead_code_elimination{},
fuse_mlir{&ctx},
enable_pass(not enabled(MIGRAPHX_DISABLE_REDUCE_FUSION{}), fuse_reduce{}),
dead_code_elimination{},
enable_pass(mlir_enabled(), fuse_mlir{&ctx}),
dead_code_elimination{},
fuse_ck{&ctx},
dead_code_elimination{},
......@@ -153,6 +158,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
dead_code_elimination{},
compile_ops{&ctx},
dead_code_elimination{},
promote_literals{},
dead_code_elimination{},
write_literals{&ctx},
schedule{gpu::schedule_model{ctx.get_current_device().nstreams()}, not enabled(MIGRAPHX_DISABLE_SCHEDULE_PASS{})},
memory_coloring{"hip::allocate"},
......
......@@ -31,10 +31,9 @@ set_target_properties(migraphx_ref PROPERTIES EXPORT_NAME ref)
rocm_set_soversion(migraphx_ref ${MIGRAPHX_SO_VERSION})
find_path(BLAZE_INCLUDE blaze/Blaze.h)
find_package(Threads)
rocm_clang_tidy_check(migraphx_ref)
target_link_libraries(migraphx_ref migraphx Threads::Threads)
target_link_libraries(migraphx_ref PUBLIC migraphx)
target_include_directories(migraphx_ref PRIVATE ${BLAZE_INCLUDE})
target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS)
......
......@@ -46,8 +46,6 @@ struct target
argument allocate(const shape& s) const;
};
MIGRAPHX_REGISTER_TARGET(target);
} // namespace ref
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -46,6 +46,7 @@ std::vector<std::string> get_op_parsers()
op_parser_map().end(),
std::back_inserter(result),
[&](auto&& p) { return p.first; });
std::sort(result.begin(), result.end());
return result;
}
......
......@@ -22,6 +22,7 @@
* THE SOFTWARE.
*/
#include <migraphx/tf/tf_parser.hpp>
#include <migraphx/tf/op_parser.hpp>
#include <iostream>
#include <fstream>
#include <unordered_map>
......@@ -62,5 +63,7 @@ program parse_tf(const std::string& name, const tf_options& options)
return std::move(parser.prog);
}
std::vector<std::string> get_tf_operators() { return tf::get_op_parsers(); }
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -24,4 +24,6 @@
// clang-format off
#define MIGRAPHX_VERSION_MAJOR @PROJECT_VERSION_MAJOR@
#define MIGRAPHX_VERSION_MINOR @PROJECT_VERSION_MINOR@
#define MIGRAPHX_VERSION_PATCH @PROJECT_VERSION_PATCH@
#define MIGRAPHX_VERSION_TWEAK @PROJECT_VERSION_TWEAK@
// clang-format on
......@@ -106,19 +106,11 @@ function(add_test_executable TEST_NAME)
if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
set_target_properties(${TEST_NAME} PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread)
endif()
separate_arguments(MIOPEN_TEST_FLAGS_ARGS UNIX_COMMAND ${MIOPEN_TEST_FLAGS})
if(MIOPEN_TEST_ALL)
set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} --all ${MIOPEN_TEST_FLAGS_ARGS})
else()
set(TEST_COMMAND ${TEST_NAME} ${MIOPEN_TEST_FLOAT_ARG} ${MIOPEN_TEST_FLAGS_ARGS})
endif()
set(TEST_COMMAND ${TEST_NAME})
add_test_command(${TEST_NAME} ${TEST_COMMAND})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
target_link_libraries(${TEST_NAME} migraphx migraphx_ref migraphx_onnx)
target_link_libraries(${TEST_NAME} migraphx migraphx_onnx migraphx_ref)
target_include_directories(${TEST_NAME} PUBLIC include)
endfunction(add_test_executable)
......@@ -142,6 +134,9 @@ if(MIGRAPHX_ENABLE_GPU)
COST 10
RESOURCE_LOCK gpu
)
if(MIGRAPHX_USE_HIPRTC)
target_compile_definitions(test_gpu_${BASE_NAME} PUBLIC -DMIGRAPHX_USE_HIPRTC)
endif()
target_link_libraries(test_gpu_${BASE_NAME} migraphx_gpu migraphx_kernels)
endforeach()
endif()
......@@ -182,7 +177,7 @@ endforeach()
set(TEST_TF_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tf)
add_executable(test_tf tf/tf_test.cpp)
rocm_clang_tidy_check(test_tf)
target_link_libraries(test_tf migraphx_tf migraphx_ref)
target_link_libraries(test_tf migraphx_tf)
target_include_directories(test_tf PUBLIC include)
add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${TEST_TF_DIR})
add_dependencies(tests test_tf)
......@@ -216,10 +211,7 @@ function(test_headers PREFIX)
string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME)
get_filename_component(BASE_NAME ${HEADER} NAME_WE)
test_header(header_${TEST_NAME} ${PREFIX}/${BASE_NAME}.hpp)
if(MIGRAPHX_ENABLE_GPU)
target_link_libraries(header_${TEST_NAME} migraphx_gpu)
endif()
target_link_libraries(header_${TEST_NAME} migraphx_all_targets)
endforeach()
endfunction()
......@@ -229,3 +221,10 @@ test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/r
if(MIGRAPHX_ENABLE_GPU)
test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
endif()
if(MIGRAPHX_ENABLE_CPU)
test_headers(migraphx/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp)
endif()
if(MIGRAPHX_ENABLE_FPGA)
test_headers(migraphx/fpga ${CMAKE_SOURCE_DIR}/src/targets/fpga/include/migraphx/fpga/*.hpp)
endif()
......@@ -25,7 +25,7 @@ function(add_api_test TEST_NAME TEST_SRC TEST_DIR)
set(NAME test_api_${TEST_NAME})
add_executable(${NAME} EXCLUDE_FROM_ALL ${TEST_SRC})
rocm_clang_tidy_check(${NAME})
target_link_libraries(${NAME} migraphx_c migraphx)
target_link_libraries(${NAME} migraphx_c migraphx migraphx_all_targets)
target_include_directories(${NAME} PUBLIC ../include)
add_test(NAME ${NAME} COMMAND $<TARGET_FILE:${NAME}> WORKING_DIRECTORY ${TEST_DIR})
add_dependencies(tests ${NAME})
......@@ -48,6 +48,7 @@ add_api_test(assign test_assign.cpp ${TEST_ONNX_DIR})
add_api_test(compile_options test_compile_options.cpp ${TEST_ONNX_DIR})
add_api_test(lookup test_lookup.cpp ${TEST_ONNX_DIR})
add_api_test(module_construct test_module_construct.cpp ${TEST_ONNX_DIR})
add_api_test(dynamic_shape test_dynamic_shape.cpp ${TEST_ONNX_DIR})
add_api_test(ref test_cpu.cpp ${TEST_ONNX_DIR})
add_api_test(save_load test_save_load.cpp ${TEST_ONNX_DIR})
add_api_test(op test_op_construct.cpp ${TEST_ONNX_DIR})
......@@ -56,8 +57,10 @@ add_api_test(custom_op test_custom_op.cpp ${TEST_ONNX_DIR})
add_api_test(tf_parser test_tf_parser.cpp ${TEST_TF_DIR})
# GPU-based tests
if(MIGRAPHX_ENABLE_GPU)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)
find_package(hip)
add_api_test(gpu test_gpu.cpp ${TEST_ONNX_DIR})
target_link_libraries(test_api_gpu migraphx_gpu)
target_link_libraries(test_api_gpu)
add_api_test(custom_op_gpu test_custom_op_gpu.cpp ${TEST_ONNX_DIR})
target_link_libraries(test_api_custom_op_gpu migraphx_gpu)
target_link_libraries(test_api_custom_op_gpu)
endif()
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include "test.hpp"
TEST_CASE(create_dynamic_dimensions)
{
migraphx::dynamic_dimension dd0{1, 4};
EXPECT(not dd0.is_fixed());
migraphx::dynamic_dimension dd1{4, 4};
EXPECT(dd1.is_fixed());
migraphx::optimals opts{1, 2, 4};
migraphx::dynamic_dimension dd2{1, 4, opts};
migraphx::dynamic_dimensions dyn_dims0{dd0, dd1, dd2};
CHECK(bool{dyn_dims0[0] == dd0});
CHECK(bool{dyn_dims0[1] == dd1});
CHECK(bool{dyn_dims0[2] == dd2});
CHECK(bool{dyn_dims0[2] != dd0});
EXPECT(dyn_dims0.size() == 3);
}
TEST_CASE(create_dynamic_shape)
{
migraphx::dynamic_dimensions dyn_dims(migraphx::dynamic_dimension{1, 4},
migraphx::dynamic_dimension{78, 92},
migraphx::dynamic_dimension{1, 4, {1, 4}});
migraphx::shape dyn_shape{migraphx_shape_float_type, dyn_dims};
CHECK(bool{dyn_shape.dynamic()});
CHECK(bool{dyn_shape.dyn_dims()[0] == migraphx::dynamic_dimension{1, 4}});
migraphx::shape static_shape{migraphx_shape_float_type, {3, 8}};
EXPECT(not static_shape.dynamic());
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -25,7 +25,6 @@
#include <hip/hip_runtime_api.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp>
#include <migraphx/manage_ptr.hpp>
#include "test.hpp"
......@@ -35,6 +34,7 @@ TEST_CASE(load_and_run)
auto shapes_before = p.get_output_shapes();
migraphx::compile_options options;
options.set_offload_copy();
options.set_exhaustive_tune_flag();
p.compile(migraphx::target("gpu"), options);
auto shapes_after = p.get_output_shapes();
CHECK(shapes_before.size() == 1);
......@@ -71,6 +71,105 @@ hip_ptr get_hip_buffer(size_t size)
return hip_ptr{ptr};
}
// TODO: placeholder until we have a way to copy tuple arguments to/from device through c++ api
// TEST_CASE(dynamic_batch_load_and_run)
//{
// migraphx::onnx_options o_options;
// migraphx::dynamic_dimensions dyn_dims = {{1, 4, {2, 4}}, {3, 3}, {4, 4}, {4, 4}};
// o_options.set_dyn_input_parameter_shape("0", dyn_dims);
// dyn_dims = {{2, 2}, {3, 3}, {3, 3}, {3, 3}};
// o_options.set_dyn_input_parameter_shape("1", dyn_dims);
// auto p = migraphx::parse_onnx("conv_dynamic_batch_test.onnx", o_options);
// migraphx::compile_options c_options;
// c_options.set_split_single_dyn_dim();
// p.compile(migraphx::target("gpu"), c_options);
// auto out_shapes = p.get_output_shapes();
// CHECK(out_shapes.size() == 1);
// EXPECT(out_shapes[0].dynamic());
//
// std::vector<float> a(0.12, 2*3*4*4);
// std::vector<float> c(0.75, 2*3*3*3);
//
// auto param_shapes = p.get_parameter_shapes();
// int batch_size = 2;
// std::unordered_map<std::string, migraphx::argument> arg_map;
//
// arg_map["0"] = migraphx::argument(param_shapes["0"].to_static(batch_size), a.data());
// arg_map["1"] = migraphx::argument(param_shapes["1"].to_static(batch_size), c.data());
//
// migraphx::program_parameters pp;
// std::vector<hip_ptr> buffs;
// std::vector<migraphx::argument> args;
//
// // copy to GPU and create parameter map
// for(auto&& name : param_shapes.names())
// {
// if(arg_map.find(name) != arg_map.end())
// {
// args.push_back(arg_map.at(name));
// }
// else
// {
// migraphx::shape static_shape = param_shapes[name].to_static(batch_size);
// auto output_arg = migraphx::argument(static_shape);
// args.push_back(output_arg);
// }
// buffs.push_back(get_hip_buffer(args.rbegin()->get_shape().bytes()));
// auto err = hipMemcpy(buffs.rbegin()->get(),
// args.rbegin()->data(),
// args.rbegin()->get_shape().bytes(),
// hipMemcpyHostToDevice);
// EXPECT(err == hipSuccess);
// pp.add(name, migraphx::argument(args.rbegin()->get_shape(), buffs.rbegin()->get()));
// }
//
// auto output = p.eval(pp)[0];
//
// // copy output back to host
// auto host_arg = migraphx::argument(output.get_shape());
// auto err = hipMemcpy(
// host_arg.data(), output.data(), output.get_shape().bytes(), hipMemcpyDeviceToHost);
// EXPECT(err == hipSuccess);
//}
TEST_CASE(dynamic_batch_load_and_run_offload)
{
migraphx::onnx_options o_options;
migraphx::dynamic_dimensions dyn_dims = {migraphx::dynamic_dimension{1, 4, {2, 4}},
migraphx::dynamic_dimension{3, 3},
migraphx::dynamic_dimension{4, 4},
migraphx::dynamic_dimension{4, 4}};
o_options.set_dyn_input_parameter_shape("0", dyn_dims);
dyn_dims = {migraphx::dynamic_dimension{2, 2},
migraphx::dynamic_dimension{3, 3},
migraphx::dynamic_dimension{3, 3},
migraphx::dynamic_dimension{3, 3}};
o_options.set_dyn_input_parameter_shape("1", dyn_dims);
auto p = migraphx::parse_onnx("conv_dynamic_batch_test.onnx", o_options);
auto shapes_before = p.get_output_shapes();
migraphx::compile_options c_options;
c_options.set_offload_copy();
p.compile(migraphx::target("gpu"), c_options);
auto out_shapes = p.get_output_shapes();
CHECK(out_shapes.size() == 1);
EXPECT(out_shapes[0].dynamic());
// batch size = 2
std::vector<float> a(2 * 3 * 4 * 4, 0.12);
std::vector<float> c(2 * 3 * 3 * 3, 0.75);
migraphx::program_parameters pp;
auto param_shapes = p.get_parameter_shapes();
pp.add("0",
migraphx::argument(migraphx::shape(migraphx_shape_float_type, {2, 3, 4, 4}), a.data()));
pp.add("1",
migraphx::argument(migraphx::shape(migraphx_shape_float_type, {2, 3, 3, 3}), c.data()));
auto outputs = p.eval(pp);
CHECK(shapes_before.size() == outputs.size());
CHECK(bool{outputs.front().get_shape() ==
migraphx::shape(migraphx_shape_float_type, {2, 1, 3, 3})});
}
TEST_CASE(load_and_run_async)
{
auto p = migraphx::parse_onnx("conv_relu_maxpool_test.onnx");
......
......@@ -36,7 +36,7 @@ bool create_shapes(bool dynamic_allowed)
try
{
shape a{shape::int64_type, {3}};
shape b{shape::float_type, {{3, 6, 0}, {4, 4, 0}}};
shape b{shape::float_type, {{3, 6}, {4, 4}}};
auto op = migraphx::make_op("add");
migraphx::check_shapes{{a, b}, op, dynamic_allowed}.has(2);
return true;
......
......@@ -26,7 +26,6 @@
#include <migraphx/make_op.hpp>
#include <migraphx/program.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/fpga/target.hpp>
#include <migraphx/target_assignments.hpp>
#include <migraphx/iterator_for.hpp>
......
......@@ -329,4 +329,36 @@ TEST_CASE(all_scalar_input)
EXPECT(p1 == p2);
}
TEST_CASE(no_input)
{
migraphx::program p;
{
auto* mm = p.get_main_module();
migraphx::shape g_shape{migraphx::shape::int64_type, {1}, {0}};
migraphx::shape s_indices{migraphx::shape::int32_type, {3}};
std::vector<int> indices{3, 800, 800};
auto a0 = mm->add_literal(migraphx::literal{s_indices, indices});
auto a1 = mm->add_literal(migraphx::literal{g_shape, {1}});
int axis = 0;
auto out = mm->add_instruction(migraphx::make_op("gather", {{"axis", axis}}), a0, a1);
mm->add_return({out});
}
run_pass(p);
// This should NOT create a pointwise module if there are no inputs here.
migraphx::program p2;
{
auto* mm = p2.get_main_module();
migraphx::shape g_shape{migraphx::shape::int64_type, {1}, {0}};
migraphx::shape s_indices{migraphx::shape::int32_type, {3}};
std::vector<int> indices{3, 800, 800};
auto a0 = mm->add_literal(migraphx::literal{s_indices, indices});
auto a1 = mm->add_literal(migraphx::literal{g_shape, {1}});
int axis = 0;
auto out = mm->add_instruction(migraphx::make_op("gather", {{"axis", axis}}), a0, a1);
mm->add_return({out});
}
EXPECT(p == p2);
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment