Unverified Commit 97d4bb6c authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into add_parity_check_ci

parents 39b097c7 bdbc38bc
......@@ -216,6 +216,7 @@ struct find_mlir_op
"quant_dot",
"add",
"clip",
"relu",
"sub",
"mul",
"div",
......
......@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape compute_shape(const std::vector<shape>& inputs) const
{
check_shapes{inputs, *this}.has(2);
if(inputs.front().standard())
return inputs.front();
auto lens = inputs.at(0).lens();
auto t = inputs.at(0).type();
return {t, lens};
......
......@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/convolution_backwards.hpp>
#include <unordered_map>
#include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp>
......@@ -146,7 +146,8 @@ struct miopen_convolution
void set_conv_descriptor()
{
cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op);
cd =
(op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op);
}
value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
......@@ -159,10 +160,31 @@ struct miopen_convolution
shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{
shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
std::size_t workspace_size = 0;
auto status = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle,
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
#ifdef MIGRAPHX_HAS_FIND_2_API
{
auto conv_problem = make_obj<miopen_problem>(
......@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
bool preallocate = false;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate = true;
#endif
auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0];
auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1];
auto y = preallocate ? allocate_gpu(output_shape) : inputs[2];
auto workspace =
preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, x.implicit()},
{miopenTensorConvolutionW, nullptr, w.implicit()},
{miopenTensorConvolutionY, nullptr, y.implicit()},
};
solution_ptr = find_solution(miopen_stream_handle,
3,
tensor_args,
workspace.implicit(),
workspace_size,
conv_problem.get(),
ctx.get_exhaustive_tune_flag());
solution_ptr = find_solution(
miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");
......@@ -195,29 +238,10 @@ struct miopen_convolution
return shape{shape::int8_type, {workspace_size}};
}
#else
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
auto x = to_gpu(generate_argument(x_shape));
auto w = to_gpu(generate_argument(w_shape));
auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1;
miopenConvAlgoPerf_t perf;
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
......@@ -337,6 +361,7 @@ struct miopen_convolution
return {s.type(), lens, strides};
}
};
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......
......@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution
find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false)
inline miopen_solution find_solution(miopenHandle_t handle,
size_t num_inputs,
const miopenTensorArgument_t* tensor_args,
void* workspace,
size_t workspace_size,
miopenProblem_t problem,
bool tune = false)
{
miopenSolution_t solution;
size_t found = 0;
miopen_find_options fo = nullptr;
miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
if(tune)
{
fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
miopenSetFindOptionTuning(fo.get(), 1);
}
auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for(auto i : range(num_inputs))
{
auto status = miopenSetFindOptionPreallocatedTensor(
fo.get(), tensor_args[i].id, tensor_args[i].buffer);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
}
auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
#else
miopenStatus_t status;
(void)(num_inputs);
(void)(tensor_args);
(void)(workspace_size);
(void)(workspace);
#endif
status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed");
MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
return result;
}
......@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
}
template <class T>
inline convolution_descriptor make_deconv(const T& op)
inline convolution_descriptor make_convolution_backwards(const T& op)
{
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenConvolutionMode_t c_mode = miopenTranspose;
......
......@@ -122,12 +122,14 @@ struct source_location_capture
{
T x;
source_location loc;
template <class U, class = decltype(T(U{}))>
// declval is a workaround since default constructor for "U" is not working with rocm-5.6
template <class U>
static U&& declval();
template <class U, class = decltype(T(declval<U>()))>
constexpr source_location_capture(U px, source_location ploc = source_location{})
: x(px), loc(ploc)
{
}
constexpr operator source_location() const { return loc; }
constexpr operator T() const { return x; }
......
......@@ -106,7 +106,7 @@ struct miopen_apply
add_extend_op("topk");
add_convolution_op("convolution");
add_convolution_op("deconvolution");
add_convolution_op("convolution_backwards");
add_convolution_op("quant_convolution");
add_gemm_op<op::dot>("dot");
add_gemm_op<op::quant_dot>("quant_dot");
......
......@@ -389,14 +389,20 @@ struct mlir_program
mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
{
auto attributes = prog->name_attributes(named_attrs);
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this;
}
mlir_operation_state& add_attribute_value(const value& v)
{
auto attributes = prog->name_attributes(v);
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this;
}
......@@ -419,13 +425,19 @@ struct mlir_program
return shape{r.type(), r.lens()};
});
auto x = prog->make_tensors(reshaped);
mlirOperationStateAddResults(&op_state, x.size(), x.data());
if(not x.empty())
{
mlirOperationStateAddResults(&op_state, x.size(), x.data());
}
return *this;
}
mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
{
mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
if(not inputs.empty())
{
mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
}
return *this;
}
......@@ -435,7 +447,10 @@ struct mlir_program
std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
return r.get();
});
mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
if(not mregions.empty())
{
mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
}
mlir_operation op(mlirOperationCreate(&op_state));
// Release memory since mlir_operation owns it
for(auto& r : regions)
......@@ -607,12 +622,12 @@ struct mlir_program
mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
// 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline(pm_front.get());
mlirPassManagerRun(pm_front.get(), mmodule.get());
mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));
// 2nd pipeline to call
get_module_tuned();
mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
mlirPassManagerRun(pm_back.get(), mmodule.get());
mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));
code_object_op op{};
op.symbol_name = sym_name;
......@@ -701,6 +716,11 @@ struct mlir_program
bool get_module_tuned() const
{
static mlir_tuning_table tuning_table = create_tuning_table();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static std::mutex lock;
std::lock_guard<std::mutex> guard(lock);
if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
{
const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
......@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{
adjust_param_shapes(m, inputs);
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
// set mutex while llvm thread support is disabled.
static std::mutex g_mlirc_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
if(trace)
std::cout << m << std::endl;
......
......@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
bool get_int8_x4_format(context& ctx)
{
#if ROCBLAS_VERSION_MAJOR >= 3
(void)(ctx);
return false;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags flag;
rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
return flag == rocblas_gemm_flags_pack_int8x4;
#endif
}
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
......
......@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp>
#include <migraphx/op/convolution_backwards.hpp>
#include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp>
......
......@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto x_type = args[0]->get_shape().type();
// unsqueeze tensors of shape (C) to broadcast correctly
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto scale_unsqueeze =
......@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto var_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]);
auto numer = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto denom = info.add_broadcastable_binary_op("pow", var_eps, rt);
auto div0 = info.add_broadcastable_binary_op("div", numer, denom);
auto r0 = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze);
auto x_sub_mean = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto rsqrt = info.add_instruction(make_op("rsqrt"), var_eps);
auto mul0 = info.add_broadcastable_binary_op("mul", scale_unsqueeze, rsqrt);
auto r0 = info.add_broadcastable_binary_op("mul", x_sub_mean, mul0);
return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
}
};
......
......@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool passed = true;
visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
double error;
passed = verify_range(ref, target, tolerance, &error);
passed = verify::verify_range(ref, target, tolerance, &error);
if(not passed)
{
// TODO: Check for nans
......@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std::cout << "ref:" << ref << std::endl;
if(target.size() < 32)
std::cout << "target:" << target << std::endl;
if(range_zero(ref))
if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target))
if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl;
auto mxdiff = max_diff(ref, target);
auto mxdiff = verify::max_diff(ref, target);
std::cout << "Max diff: " << mxdiff << std::endl;
auto idx = mismatch_idx(ref, target, float_equal);
if(idx < range_distance(ref))
auto idx = verify::mismatch_idx(ref, target, float_equal);
if(idx < verify::range_distance(ref))
{
std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
<< std::endl;
}
auto ref_nan_idx = find_idx(ref, not_finite);
auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite);
auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl;
......@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
}
else
{
if(range_zero(ref))
if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target))
if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl;
// auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref))
// if(idx < verify::range_distance(ref))
// {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl;
// }
auto ref_nan_idx = find_idx(ref, not_finite);
auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite);
auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl;
......
......@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
auto shapes_before = p.get_output_shapes();
migraphx::compile_options options;
options.set_offload_copy();
options.set_exhaustive_tune_flag();
p.compile(migraphx::target("gpu"), options);
auto shapes_after = p.get_output_shapes();
CHECK(shapes_before.size() == 1);
......
......@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx::target gpu_t = migraphx::make_target("gpu");
run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify_range(ref_result, gpu_result));
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
auto result = p.eval(pp).back();
std::vector<float> results_vector(ss.elements(), -1);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(c_vec, results_vector));
EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
}
TEST_CASE(arguments_lifetime)
......
......@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
std::vector<int8_t> val_final;
ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
EXPECT(migraphx::verify_range(val_orig, val_final));
EXPECT(migraphx::verify::verify_range(val_orig, val_final));
}
TEST_CASE(int8_quantization)
......@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much
// earlier stage.
if(migraphx::gpu::mlir_enabled())
EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5));
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
else
EXPECT(migraphx::verify_range(ref_result, gpu_result));
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
}
}
......
......@@ -34,13 +34,13 @@
#include <migraphx/literal.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp>
#include <basic_ops.hpp>
#include <migraphx/compile_options.hpp>
#include <migraphx/register_target.hpp>
#include <migraphx/generate.hpp>
#include "test.hpp"
// check if it is custom_op or run_on_module operator
......@@ -180,38 +180,74 @@ TEST_CASE(multitarget_compile_cpu_gpu)
auto z_param = mm->add_parameter("z", s);
auto cpu_ins = mm->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {x_param, y_param}, {cpu_mod});
auto cpu_ins_0 =
mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), cpu_ins);
auto gpu_ins = mm->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {cpu_ins, z_param}, {gpu_mod});
mm->add_return({gpu_ins});
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")});
migraphx::make_op("run_on_target", {{"target_id", 0}}), {cpu_ins_0, z_param}, {gpu_mod});
auto gpu_ins_0 =
mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), gpu_ins);
mm->add_return({gpu_ins_0});
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")}));
migraphx::parameter_map params;
params["x"] = migraphx::fill_argument(s, 1);
params["y"] = migraphx::fill_argument(s, 2);
params["z"] = migraphx::fill_argument(s, 3);
auto result = p.eval(params).back();
auto gold = migraphx::fill_argument(s, 6);
EXPECT(gold == result);
}
TEST_CASE(single_target_compile)
TEST_CASE(single_target_multi_compile)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
auto* mm = p.get_main_module();
auto boxes_param = mm->add_parameter("boxes", boxes_s);
auto* gpu_mod = p.create_module("gpu_mod");
auto boxes_param_gpu = gpu_mod->add_parameter("boxes_param_gpu", boxes_s);
migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};
auto boxes_l = mm->add_parameter("boxes", boxes_s);
auto scores_l = mm->add_literal(migraphx::literal(scores_s, scores_vec));
auto max_out_l = mm->add_literal(int64_t{4});
auto iou_threshold = mm->add_literal(0.5f);
auto score_threshold = mm->add_literal(0.0f);
auto r = mm->add_instruction(migraphx::make_op("nonmaxsuppression", {{"center_point_box", 1}}),
boxes_l,
scores_l,
max_out_l,
iou_threshold,
score_threshold);
mm->add_return({r});
p.compile(migraphx::make_target("gpu"));
EXPECT(is_compiled_gpu_module(*p.get_main_module()));
auto scores_l = gpu_mod->add_literal(migraphx::literal(scores_s, scores_vec));
auto max_out_l = gpu_mod->add_literal(int64_t{4});
auto iou_threshold = gpu_mod->add_literal(0.5f);
auto score_threshold = gpu_mod->add_literal(0.0f);
auto r = gpu_mod->add_instruction(
migraphx::make_op("nonmaxsuppression",
{{"center_point_box", true}, {"use_dyn_output", true}}),
boxes_param_gpu,
scores_l,
max_out_l,
iou_threshold,
score_threshold);
gpu_mod->add_return({r});
auto run_on_gpu = mm->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {boxes_param}, {gpu_mod});
auto run_on_gpu_0 =
mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_on_gpu);
mm->add_return({run_on_gpu_0});
// compile using multi-target compilation path
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
// need to add "ref" to avoid ambigious call to "compile()"
p.compile({migraphx::make_target("gpu"), migraphx::make_target("ref")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("ref")}));
// eval
migraphx::parameter_map params;
std::vector<float> boxes_vec = {0.5, 0.5, 1.0, 1.0, 0.5, 0.6, 1.0, 1.0, 0.5, 0.4, 1.0, 1.0,
0.5, 10.5, 1.0, 1.0, 0.5, 10.6, 1.0, 1.0, 0.5, 100.5, 1.0, 1.0};
params["boxes"] = migraphx::argument(boxes_s, boxes_vec.data());
auto output = p.eval(params).back();
std::vector<int64_t> gold_vec = {0, 0, 3, 0, 0, 0, 0, 0, 5};
auto gold =
migraphx::argument(migraphx::shape{migraphx::shape::int64_type, {3, 3}}, gold_vec.data());
EXPECT(output == gold);
}
TEST_CASE(multitarget_compile_if_then_else)
......@@ -224,54 +260,65 @@ TEST_CASE(multitarget_compile_if_then_else)
auto x = mm->add_parameter("x", ds);
auto y = mm->add_parameter("y", ds);
auto* then_mod = p.create_module("if_gpu_mod");
std::vector<float> data1 = {0.384804, -1.77948, -0.453775, 0.477438, -1.06333, -1.12893};
auto l1 = then_mod->add_literal(migraphx::literal(ds, data1));
auto a1 = then_mod->add_instruction(migraphx::make_op("add"), x, l1);
auto* then_mod = p.create_module("if_gpu_mod");
std::vector<float> data1(ds.elements(), 1);
auto l1 = then_mod->add_literal(migraphx::literal(ds, data1));
auto gpu_x = then_mod->add_parameter("gpu_x", ds);
auto a1 = then_mod->add_instruction(migraphx::make_op("add"), gpu_x, l1);
then_mod->add_return({a1});
auto* else_mod = p.create_module("else_cpu_mod");
std::vector<float> data2 = {-0.258047, 0.360394, 0.536804, -0.577762, 1.0217, 1.02442};
auto l2 = else_mod->add_literal(migraphx::literal(ds, data2));
auto a2 = else_mod->add_instruction(migraphx::make_op("mul"), y, l2);
auto* else_mod = p.create_module("else_cpu_mod");
std::vector<float> data2(ds.elements(), 2);
auto l2 = else_mod->add_literal(migraphx::literal(ds, data2));
auto cpu_y = else_mod->add_parameter("cpu_y", ds);
auto a2 = else_mod->add_instruction(migraphx::make_op("mul"), cpu_y, l2);
else_mod->add_return({a2});
auto* run_on_cpu_mod = p.create_module("run_on_cpu");
auto run_cpu_ins = run_on_cpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {}, {else_mod});
run_on_cpu_mod->add_return({run_cpu_ins});
migraphx::make_op("run_on_target", {{"target_id", 1}}), {y}, {else_mod});
auto run_cpu_ins_0 = run_on_cpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_cpu_ins);
run_on_cpu_mod->add_return({run_cpu_ins_0});
auto* run_on_gpu_mod = p.create_module("run_on_gpu");
auto run_gpu_ins = run_on_gpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {}, {then_mod});
run_on_gpu_mod->add_return({run_gpu_ins});
migraphx::make_op("run_on_target", {{"target_id", 0}}), {x}, {then_mod});
auto run_gpu_ins_0 = run_on_gpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_gpu_ins);
run_on_gpu_mod->add_return({run_gpu_ins_0});
auto ret =
mm->add_instruction(migraphx::make_op("if"), {cond}, {run_on_gpu_mod, run_on_cpu_mod});
auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), ret);
mm->add_return({r});
// compile
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")});
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")}));
migraphx::parameter_map params;
params["x"] = migraphx::fill_argument(ds, 2);
params["y"] = migraphx::fill_argument(ds, 3);
for(bool cond_val : {true, false})
{
params["cond"] = migraphx::argument(cond_s, &cond_val);
auto result = p.eval(params).back();
auto gold = migraphx::fill_argument(ds, (cond_val ? 3 : 6));
EXPECT(gold == result);
}
}
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE(multitarget_compile_nested_if_then_else)
{
float seed = 0.0f;
std::mt19937 gen(seed);
std::uniform_real_distribution<> dis(0.0, 1.0);
auto get_random_values = [&](size_t elements) {
std::vector<float> rand_samples(elements);
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); });
return rand_samples;
};
std::unordered_map<std::size_t, std::size_t> counter_map = {{0, 0}, {1, 0}};
migraphx::shape ds{migraphx::shape::float_type, {2, 3}};
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape cond_s{migraphx::shape::bool_type};
auto cond = mm->add_parameter("cond", cond_s);
auto cond_0 = mm->add_parameter("cond_0", cond_s);
auto cond_1 = mm->add_parameter("cond_1", cond_s);
auto x = mm->add_parameter("x", ds);
auto y = mm->add_parameter("y", ds);
auto z = mm->add_parameter("z", ds);
......@@ -280,20 +327,22 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
std::size_t tid) {
std::string mod_name =
"target_" + std::to_string(tid) + "_" + std::to_string(counter_map[tid]++);
auto* test_mod = prog.create_module(mod_name);
std::vector<float> data = get_random_values(ds.elements());
auto l1 = test_mod->add_literal(migraphx::literal(ds, data));
auto test_mod_param = test_mod->add_parameter(mod_name, ds);
// instruction with local literal and main_mod param as inputs
auto ins1 = test_mod->add_instruction(migraphx::make_op("add"), x, l1);
// instructinon with local param and local ins as inputs
auto ins2 = test_mod->add_instruction(migraphx::make_op("mul"), ins1, test_mod_param);
// instruction with local ins and parent ins as inputs
auto ins3 = test_mod->add_instruction(migraphx::make_op("sub"), ins2, inputs.front());
auto* test_mod = prog.create_module(mod_name);
std::vector<float> data(ds.elements(), -1);
auto l1 = test_mod->add_literal(migraphx::literal(ds, data));
auto test_mod_param_0 = test_mod->add_parameter(mod_name + "_param_0", ds);
auto test_mod_param_1 = test_mod->add_parameter(mod_name + "_param_1", ds);
auto test_mod_param_2 = test_mod->add_parameter(mod_name + "_param_2", ds);
auto ins1 = test_mod->add_instruction(migraphx::make_op("add"), test_mod_param_0, l1);
auto ins2 = test_mod->add_instruction(migraphx::make_op("mul"), ins1, test_mod_param_1);
auto ins3 = test_mod->add_instruction(migraphx::make_op("sub"), ins2, test_mod_param_2);
test_mod->add_return({ins3});
auto* run_on_target_mod = prog.create_module("run_on_" + mod_name);
run_on_target_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", tid}}), {inputs.front()}, {test_mod});
auto run_ins = run_on_target_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", tid}}), inputs, {test_mod});
auto run_ins_0 = run_on_target_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_ins);
run_on_target_mod->add_return({run_ins_0});
return run_on_target_mod;
};
......@@ -307,15 +356,30 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
ref_mod->add_return({ref_add});
auto* then_mod = p.create_module("then_mod");
auto then_mod_param = then_mod->add_parameter("then_mod_param", ds);
auto then_mod_ref_ins = then_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 3}}), {then_mod_param, y}, {ref_mod});
auto then_mod_cond = then_mod->add_parameter("then_mod_cond", cond_s);
auto then_mod_param_0 = then_mod->add_parameter("then_mod_param_0", ds);
auto then_mod_param_1 = then_mod->add_parameter("then_mod_param_1", ds);
auto then_mod_param_2 = then_mod->add_parameter("then_mod_param_2", ds);
auto then_mod_ref_ins =
then_mod->add_instruction(migraphx::make_op("run_on_target", {{"target_id", 3}}),
{then_mod_param_0, then_mod_param_1},
{ref_mod});
auto then_mod_ref_ins_0 = then_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), then_mod_ref_ins);
then_mod->add_instruction(
auto then_mod_if = then_mod->add_instruction(
migraphx::make_op("if"),
{cond},
{create_test_module(p, {z}, 1), create_test_module(p, {then_mod_ref_ins_0}, 0)});
{then_mod_cond,
then_mod_param_0,
then_mod_param_1,
then_mod_param_2,
then_mod_ref_ins_0,
then_mod_param_1,
then_mod_param_2},
{create_test_module(p, {then_mod_param_0, then_mod_param_1, then_mod_param_2}, 1),
create_test_module(p, {then_mod_ref_ins_0, then_mod_param_1, then_mod_param_2}, 0)});
auto then_mod_if_0 =
then_mod->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), then_mod_if);
then_mod->add_return({then_mod_if_0});
// create nested else_mod with multiple targets.
// else_mod has one instruction that runs a module on "fpga" and another instruction that
......@@ -326,53 +390,105 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
auto fpga_add = fpga_mod->add_instruction(migraphx::make_op("add"), fpga_x, fpga_y);
fpga_mod->add_return({fpga_add});
auto* else_mod = p.create_module("else_mod");
auto else_mod_param = else_mod->add_parameter("else_mod_param", ds);
auto else_mod_fpga_ins = else_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 2}}), {else_mod_param, y}, {fpga_mod});
auto* else_mod = p.create_module("else_mod");
auto else_mod_cond = else_mod->add_parameter("else_mod_cond", cond_s);
auto else_mod_param_0 = else_mod->add_parameter("else_mod_param_0", ds);
auto else_mod_param_1 = else_mod->add_parameter("else_mod_param_1", ds);
auto else_mod_param_2 = else_mod->add_parameter("else_mod_param_2", ds);
auto else_mod_fpga_ins =
else_mod->add_instruction(migraphx::make_op("run_on_target", {{"target_id", 2}}),
{else_mod_param_0, else_mod_param_2},
{fpga_mod});
auto else_mod_fpga_ins_0 = else_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), else_mod_fpga_ins);
else_mod->add_instruction(migraphx::make_op("if"),
{cond},
{create_test_module(p, {else_mod_fpga_ins_0}, 0),
create_test_module(p, {else_mod_param}, 1)});
auto else_mod_if = else_mod->add_instruction(
migraphx::make_op("if"),
{else_mod_cond,
else_mod_fpga_ins_0,
else_mod_param_0,
else_mod_param_1,
else_mod_param_2,
else_mod_param_1,
else_mod_param_0},
{create_test_module(p, {else_mod_fpga_ins_0, else_mod_param_0, else_mod_param_1}, 0),
create_test_module(p, {else_mod_param_2, else_mod_param_1, else_mod_param_0}, 1)});
auto else_mod_if_0 =
else_mod->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), else_mod_if);
else_mod->add_return({else_mod_if_0});
// Create nested and multi-target main module using "If"
auto main_if_ins =
mm->add_instruction(migraphx::make_op("if"), {cond, x}, {then_mod, else_mod});
auto main_if_ins = mm->add_instruction(
migraphx::make_op("if"), {cond_0, cond_1, x, y, z, cond_1, x, y, z}, {then_mod, else_mod});
auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), main_if_ins);
mm->add_return({r});
// compile
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"),
migraphx::make_target("cpu"),
migraphx::make_target("fpga"),
migraphx::make_target("ref")});
migraphx::make_target("ref"),
migraphx::make_target("ref")},
{gpu_opts});
EXPECT(check_compiled_program(p,
{migraphx::make_target("gpu"),
migraphx::make_target("cpu"),
migraphx::make_target("fpga"),
migraphx::make_target("ref"),
migraphx::make_target("ref")}));
// do evaluation using different conditions
// TODO: make two conditional to cover all the paths
migraphx::parameter_map params;
float x_i = 2.0;
float y_i = 3.0;
float z_i = 4.0;
params["x"] = migraphx::fill_argument(ds, x_i);
params["y"] = migraphx::fill_argument(ds, y_i);
params["z"] = migraphx::fill_argument(ds, z_i);
// cover all paths with different combination of conditions
std::vector<std::pair<bool, bool>> test_conds = {
{true, true}, {true, false}, {false, true}, {false, false}};
for(auto [cond_val_0, cond_val_1] : test_conds)
{
params["cond_0"] = migraphx::argument(cond_s, &cond_val_0);
params["cond_1"] = migraphx::argument(cond_s, &cond_val_1);
auto result = p.eval(params).back();
// main has one instruction that is : if_then_else
// then mod is doing : {tmp = x+y; (cond) ? (((x-1)*y)-z) : (((tmp-1)*y)-z);}
// else mod is doing : {tmp = x+z; (cond) ? (((tmp-1)*x)-y) : (((z-1)*y)-x);}
float gold_i = -1.0;
if(cond_val_0)
{
float tmp_i = x_i + y_i;
gold_i = (cond_val_1) ? (((x_i - 1) * y_i) - z_i) : (((tmp_i - 1) * y_i) - z_i);
}
else
{
float tmp_i = x_i + z_i;
gold_i = (cond_val_1) ? (((tmp_i - 1) * x_i) - y_i) : (((z_i - 1) * y_i) - x_i);
}
auto gold = migraphx::fill_argument(ds, gold_i);
EXPECT(gold == result);
}
}
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE(multitarget_select_module)
{
migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape lit_s{migraphx::shape{migraphx::shape::float_type, {1}}};
auto literal_ins = mm->add_literal(migraphx::literal{lit_s, {6}});
// create batch submodules
auto create_submodule = [&](std::size_t batch_size, const std::string& module_name) {
auto* submod = p.create_module(module_name);
migraphx::shape sm_shape{migraphx::shape::float_type, {batch_size, 4}};
auto sm_input = submod->add_parameter("data", sm_shape);
migraphx::shape lit_s{migraphx::shape{migraphx::shape::float_type, {1}}};
auto literal_ins = submod->add_literal(migraphx::literal{lit_s, {6}});
auto broadcast_lit =
submod->add_instruction(migraphx::make_op("multibroadcast"), literal_ins, sm_input);
auto add_ins0 = submod->add_instruction(migraphx::make_op("add"), sm_input, broadcast_lit);
auto add_ins1 = submod->add_instruction(migraphx::make_op("add"), add_ins0, broadcast_lit);
submod->add_return({add_ins0, add_ins1});
submod->add_return({add_ins1});
return submod;
};
auto* batch1 = create_submodule(1, "batch_1");
......@@ -380,36 +496,45 @@ TEST_CASE(multitarget_select_module)
auto* batch3 = create_submodule(3, "batch_3");
auto* batch4 = create_submodule(4, "batch_4");
migraphx::shape s{migraphx::shape::float_type, {{1, 4}, {4, 4}}};
auto input = mm->add_parameter("data", s);
auto* run_cpu_mod = p.create_module("cpu_mod");
auto cpu_param = run_cpu_mod->add_parameter(
"cpu_data", migraphx::shape{migraphx::shape::float_type, {1, 4}});
auto cpu_param =
run_cpu_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {1, 4}});
auto run_cpu_ins = run_cpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {cpu_param}, {batch1});
run_cpu_mod->add_return({run_cpu_ins});
auto run_cpu_ins_0 = run_cpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_cpu_ins);
run_cpu_mod->add_return({run_cpu_ins_0});
auto* run_gpu_mod = p.create_module("gpu_mod");
auto gpu_param = run_gpu_mod->add_parameter(
"gpu_data", migraphx::shape{migraphx::shape::float_type, {2, 4}});
auto gpu_param =
run_gpu_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {2, 4}});
auto run_gpu_ins = run_gpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {gpu_param}, {batch2});
run_gpu_mod->add_return({run_gpu_ins});
auto run_gpu_ins_0 = run_gpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_gpu_ins);
run_gpu_mod->add_return({run_gpu_ins_0});
auto* run_fpga_mod = p.create_module("fpga_mod");
auto fpga_param = run_fpga_mod->add_parameter(
"fpga_data", migraphx::shape{migraphx::shape::float_type, {3, 4}});
auto fpga_param =
run_fpga_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {3, 4}});
auto run_fpga_ins = run_fpga_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 2}}), {fpga_param}, {batch3});
run_fpga_mod->add_return({run_fpga_ins});
auto run_fpga_ins_0 = run_fpga_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_fpga_ins);
run_fpga_mod->add_return({run_fpga_ins_0});
auto* run_ref_mod = p.create_module("ref_mod");
auto ref_param = run_fpga_mod->add_parameter(
"ref_data", migraphx::shape{migraphx::shape::float_type, {4, 4}});
auto ref_param =
run_ref_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {4, 4}});
auto run_ref_ins = run_ref_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 3}}), {ref_param}, {batch4});
run_ref_mod->add_return({run_ref_ins});
auto run_ref_ins_0 = run_ref_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_ref_ins);
run_ref_mod->add_return({run_ref_ins_0});
auto* mm = p.get_main_module();
migraphx::shape dyn_s{migraphx::shape::float_type, {{1, 4}, {4, 4}}};
auto input = mm->add_parameter("data", dyn_s);
std::vector<migraphx::shape> sub_shapes = {};
sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}});
sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}});
......@@ -419,18 +544,34 @@ TEST_CASE(multitarget_select_module)
{input},
{run_cpu_mod, run_gpu_mod, run_fpga_mod, run_ref_mod});
auto ret0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), sm_ins);
auto ret1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), sm_ins);
mm->add_return({ret0, ret1});
mm->add_return({ret0});
// compile
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"),
migraphx::make_target("cpu"),
migraphx::make_target("fpga"),
migraphx::make_target("ref")});
migraphx::make_target("ref"),
migraphx::make_target("ref")},
{gpu_opts});
EXPECT(check_compiled_program(p,
{migraphx::make_target("gpu"),
migraphx::make_target("cpu"),
migraphx::make_target("fpga"),
migraphx::make_target("ref"),
migraphx::make_target("ref")}));
// program does the 12+x where x has dynamic shape {{1, 4}, {4, 4}}
for(const size_t bs : {1, 2, 3, 4})
{
migraphx::shape arg_shape{migraphx::shape::float_type, {bs, 4}};
migraphx::parameter_map params;
params["data"] = migraphx::generate_argument(arg_shape, arg_shape.elements());
std::vector<float> input_data;
params["data"].visit([&](const auto& vec) { input_data.assign(vec.begin(), vec.end()); });
std::transform(input_data.begin(), input_data.end(), input_data.begin(), [](const auto& i) {
return i + 12.0;
});
auto result = p.eval(params).back();
EXPECT(migraphx::argument(arg_shape, input_data.data()) == result);
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
3be6eb53c8b359703cb645ed2cb1cdf106924b7c
d3295f4329d744fe1f8419e1220e123807282b99
conv_transpose_auto_pad_test:±
:
x
wyconv1" ConvTranspose*
auto_pad"
SAME_UPPER conv_transpose_auto_pad_testZ
x




Z
w




b
y




B
\ No newline at end of file
deconv_bias_test:ž
conv_transpose_bias_test:¦
"
x
w
byconv1" ConvTransposedeconv_bias_testZ
byconv1" ConvTransposeconv_transpose_bias_testZ
x


......@@ -24,4 +24,4 @@



B
B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment