Unverified Commit 97d4bb6c authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into add_parity_check_ci

parents 39b097c7 bdbc38bc
...@@ -216,6 +216,7 @@ struct find_mlir_op ...@@ -216,6 +216,7 @@ struct find_mlir_op
"quant_dot", "quant_dot",
"add", "add",
"clip", "clip",
"relu",
"sub", "sub",
"mul", "mul",
"div", "div",
......
...@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous> ...@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2); check_shapes{inputs, *this}.has(2);
if(inputs.front().standard())
return inputs.front();
auto lens = inputs.at(0).lens(); auto lens = inputs.at(0).lens();
auto t = inputs.at(0).type(); auto t = inputs.at(0).type();
return {t, lens}; return {t, lens};
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include <migraphx/op/identity.hpp> #include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/deconvolution.hpp> #include <migraphx/op/convolution_backwards.hpp>
#include <unordered_map> #include <unordered_map>
#include <migraphx/reflect.hpp> #include <migraphx/reflect.hpp>
#include <migraphx/gpu/context.hpp> #include <migraphx/gpu/context.hpp>
...@@ -146,7 +146,8 @@ struct miopen_convolution ...@@ -146,7 +146,8 @@ struct miopen_convolution
void set_conv_descriptor() void set_conv_descriptor()
{ {
cd = (op.name() == "deconvolution") ? make_deconv(op) : make_conv(op); cd =
(op.name() == "convolution_backwards") ? make_convolution_backwards(op) : make_conv(op);
} }
value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input) value compile(migraphx::context& ctx, const shape& output, const std::vector<shape>& input)
...@@ -159,10 +160,31 @@ struct miopen_convolution ...@@ -159,10 +160,31 @@ struct miopen_convolution
shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs) shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
{ {
shape workspace_shape{}; shape workspace_shape{};
auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format); auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format); auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
auto y_desc = make_tensor(reshape_if_1d(output_shape)); auto y_desc = make_tensor(reshape_if_1d(output_shape));
auto* miopen_stream_handle = ctx.get_stream().get_miopen();
std::size_t workspace_size = 0; std::size_t workspace_size = 0;
auto status = miopenConvolutionForwardGetWorkSpaceSize(miopen_stream_handle,
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
#ifdef MIGRAPHX_HAS_FIND_2_API #ifdef MIGRAPHX_HAS_FIND_2_API
{ {
auto conv_problem = make_obj<miopen_problem>( auto conv_problem = make_obj<miopen_problem>(
...@@ -170,13 +192,34 @@ struct miopen_convolution ...@@ -170,13 +192,34 @@ struct miopen_convolution
set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem); set_tensor_descriptor(miopenTensorConvolutionX, x_desc, conv_problem);
set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem); set_tensor_descriptor(miopenTensorConvolutionW, w_desc, conv_problem);
bool preallocate = false;
#ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
// MIOpen has APIs to pass pre-allocated buffers starting from rocm-5.6
preallocate = true;
#endif
auto x = preallocate ? to_gpu(generate_argument(x_shape)) : inputs[0];
auto w = preallocate ? to_gpu(generate_argument(w_shape)) : inputs[1];
auto y = preallocate ? allocate_gpu(output_shape) : inputs[2];
auto workspace =
preallocate ? allocate_gpu(workspace_shape) : migraphx::argument(workspace_shape);
set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem); set_tensor_descriptor(miopenTensorConvolutionY, y_desc, conv_problem);
auto* miopen_stream_handle = ctx.get_stream().get_miopen(); const miopenTensorArgument_t tensor_args[3] = {
{miopenTensorConvolutionX, nullptr, x.implicit()},
{miopenTensorConvolutionW, nullptr, w.implicit()},
{miopenTensorConvolutionY, nullptr, y.implicit()},
};
solution_ptr = find_solution(miopen_stream_handle,
3,
tensor_args,
workspace.implicit(),
workspace_size,
conv_problem.get(),
ctx.get_exhaustive_tune_flag());
solution_ptr = find_solution( status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
miopen_stream_handle, conv_problem.get(), ctx.get_exhaustive_tune_flag());
auto status = miopenGetSolutionWorkspaceSize(solution_ptr.get(), &workspace_size);
if(status != miopenStatusSuccess) if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size"); MIGRAPHX_THROW("MIOpen" + op.name() + " : failed to get solution's workspace size");
...@@ -195,29 +238,10 @@ struct miopen_convolution ...@@ -195,29 +238,10 @@ struct miopen_convolution
return shape{shape::int8_type, {workspace_size}}; return shape{shape::int8_type, {workspace_size}};
} }
#else #else
auto status = miopenConvolutionForwardGetWorkSpaceSize(ctx.get_stream().get_miopen(),
w_desc.get(),
x_desc.get(),
cd.get(),
y_desc.get(),
&workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen" + op.name() + " : Failed to get forward workspace size");
workspace_shape = shape{shape::int8_type, {workspace_size}};
auto x_shape = inputs[0];
auto w_shape = inputs[1];
if(int8_x4_format)
{
x_shape = pack_int8_shape(x_shape);
w_shape = pack_int8_shape(w_shape);
}
auto x = to_gpu(generate_argument(x_shape)); auto x = to_gpu(generate_argument(x_shape));
auto w = to_gpu(generate_argument(w_shape)); auto w = to_gpu(generate_argument(w_shape));
auto y = allocate_gpu(output_shape); auto y = allocate_gpu(output_shape);
auto workspace = allocate_gpu(workspace_shape); auto workspace = allocate_gpu(workspace_shape);
int algo_count = 1; int algo_count = 1;
miopenConvAlgoPerf_t perf; miopenConvAlgoPerf_t perf;
status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(), status = miopenFindConvolutionForwardAlgorithm(ctx.get_stream().get_miopen(),
...@@ -337,6 +361,7 @@ struct miopen_convolution ...@@ -337,6 +361,7 @@ struct miopen_convolution
return {s.type(), lens, strides}; return {s.type(), lens, strides};
} }
}; };
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr ...@@ -75,21 +75,43 @@ using miopen_find_options = MIGRAPHX_MANAGE_PTR(miopenFindOptions_t, miopenDestr
using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem); using miopen_problem = MIGRAPHX_MANAGE_PTR(miopenProblem_t, miopenDestroyProblem);
using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution); using miopen_solution = MIGRAPHX_MANAGE_PTR(miopenSolution_t, miopenDestroySolution);
inline miopen_solution inline miopen_solution find_solution(miopenHandle_t handle,
find_solution(miopenHandle_t handle, miopenProblem_t problem, bool tune = false) size_t num_inputs,
const miopenTensorArgument_t* tensor_args,
void* workspace,
size_t workspace_size,
miopenProblem_t problem,
bool tune = false)
{ {
miopenSolution_t solution; miopenSolution_t solution;
size_t found = 0; size_t found = 0;
miopen_find_options fo = nullptr; miopen_find_options fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
if(tune) if(tune)
{ {
fo = make_obj<miopen_find_options>(&miopenCreateFindOptions);
miopenSetFindOptionTuning(fo.get(), 1); miopenSetFindOptionTuning(fo.get(), 1);
} }
auto status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1); #ifdef MIGRAPHX_PREALLOCATE_MIOPEN_BUFFERS
for(auto i : range(num_inputs))
{
auto status = miopenSetFindOptionPreallocatedTensor(
fo.get(), tensor_args[i].id, tensor_args[i].buffer);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate tensors for the find process");
}
auto status = miopenSetFindOptionPreallocatedWorkspace(fo.get(), workspace, workspace_size);
if(status != miopenStatusSuccess)
MIGRAPHX_THROW("MIOpen: failed to preallocate workspace for the find process");
#else
miopenStatus_t status;
(void)(num_inputs);
(void)(tensor_args);
(void)(workspace_size);
(void)(workspace);
#endif
status = miopenFindSolutions(handle, problem, fo.get(), &solution, &found, 1);
auto result = miopen_solution{solution}; auto result = miopen_solution{solution};
if(status != miopenStatusSuccess or found == 0) if(status != miopenStatusSuccess or found == 0)
MIGRAPHX_THROW("MIOpen miopenFindSolutions failed"); MIGRAPHX_THROW("MIOpen: miopenFindSolutions failed");
return result; return result;
} }
...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op) ...@@ -170,7 +192,7 @@ inline convolution_descriptor make_conv(const T& op)
} }
template <class T> template <class T>
inline convolution_descriptor make_deconv(const T& op) inline convolution_descriptor make_convolution_backwards(const T& op)
{ {
auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor); auto c = make_obj<convolution_descriptor>(&miopenCreateConvolutionDescriptor);
miopenConvolutionMode_t c_mode = miopenTranspose; miopenConvolutionMode_t c_mode = miopenTranspose;
......
...@@ -122,12 +122,14 @@ struct source_location_capture ...@@ -122,12 +122,14 @@ struct source_location_capture
{ {
T x; T x;
source_location loc; source_location loc;
template <class U, class = decltype(T(U{}))> // declval is a workaround since default constructor for "U" is not working with rocm-5.6
template <class U>
static U&& declval();
template <class U, class = decltype(T(declval<U>()))>
constexpr source_location_capture(U px, source_location ploc = source_location{}) constexpr source_location_capture(U px, source_location ploc = source_location{})
: x(px), loc(ploc) : x(px), loc(ploc)
{ {
} }
constexpr operator source_location() const { return loc; } constexpr operator source_location() const { return loc; }
constexpr operator T() const { return x; } constexpr operator T() const { return x; }
......
...@@ -106,7 +106,7 @@ struct miopen_apply ...@@ -106,7 +106,7 @@ struct miopen_apply
add_extend_op("topk"); add_extend_op("topk");
add_convolution_op("convolution"); add_convolution_op("convolution");
add_convolution_op("deconvolution"); add_convolution_op("convolution_backwards");
add_convolution_op("quant_convolution"); add_convolution_op("quant_convolution");
add_gemm_op<op::dot>("dot"); add_gemm_op<op::dot>("dot");
add_gemm_op<op::quant_dot>("quant_dot"); add_gemm_op<op::quant_dot>("quant_dot");
......
...@@ -389,14 +389,20 @@ struct mlir_program ...@@ -389,14 +389,20 @@ struct mlir_program
mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs) mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
{ {
auto attributes = prog->name_attributes(named_attrs); auto attributes = prog->name_attributes(named_attrs);
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this; return *this;
} }
mlir_operation_state& add_attribute_value(const value& v) mlir_operation_state& add_attribute_value(const value& v)
{ {
auto attributes = prog->name_attributes(v); auto attributes = prog->name_attributes(v);
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this; return *this;
} }
...@@ -419,13 +425,19 @@ struct mlir_program ...@@ -419,13 +425,19 @@ struct mlir_program
return shape{r.type(), r.lens()}; return shape{r.type(), r.lens()};
}); });
auto x = prog->make_tensors(reshaped); auto x = prog->make_tensors(reshaped);
mlirOperationStateAddResults(&op_state, x.size(), x.data()); if(not x.empty())
{
mlirOperationStateAddResults(&op_state, x.size(), x.data());
}
return *this; return *this;
} }
mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs) mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
{ {
mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data()); if(not inputs.empty())
{
mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
}
return *this; return *this;
} }
...@@ -435,7 +447,10 @@ struct mlir_program ...@@ -435,7 +447,10 @@ struct mlir_program
std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) { std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
return r.get(); return r.get();
}); });
mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data()); if(not mregions.empty())
{
mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
}
mlir_operation op(mlirOperationCreate(&op_state)); mlir_operation op(mlirOperationCreate(&op_state));
// Release memory since mlir_operation owns it // Release memory since mlir_operation owns it
for(auto& r : regions) for(auto& r : regions)
...@@ -607,12 +622,12 @@ struct mlir_program ...@@ -607,12 +622,12 @@ struct mlir_program
mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())}; mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
// 1st pipeline to call // 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline(pm_front.get()); mlirMIGraphXAddHighLevelPipeline(pm_front.get());
mlirPassManagerRun(pm_front.get(), mmodule.get()); mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));
// 2nd pipeline to call // 2nd pipeline to call
get_module_tuned(); get_module_tuned();
mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str()); mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
mlirPassManagerRun(pm_back.get(), mmodule.get()); mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));
code_object_op op{}; code_object_op op{};
op.symbol_name = sym_name; op.symbol_name = sym_name;
...@@ -701,6 +716,11 @@ struct mlir_program ...@@ -701,6 +716,11 @@ struct mlir_program
bool get_module_tuned() const bool get_module_tuned() const
{ {
static mlir_tuning_table tuning_table = create_tuning_table(); static mlir_tuning_table tuning_table = create_tuning_table();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static std::mutex lock;
std::lock_guard<std::mutex> guard(lock);
if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get())) if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
{ {
const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get()); const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
...@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct ...@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{ {
adjust_param_shapes(m, inputs); adjust_param_shapes(m, inputs);
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
// set mutex while llvm thread support is disabled.
static std::mutex g_mlirc_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
if(trace) if(trace)
std::cout << m << std::endl; std::cout << m << std::endl;
......
...@@ -55,9 +55,16 @@ bool get_compute_fp32_flag() ...@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
bool get_int8_x4_format(context& ctx) bool get_int8_x4_format(context& ctx)
{ {
#if ROCBLAS_VERSION_MAJOR >= 3
(void)(ctx);
return false;
#else
// int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
// v3.0 and will be removed in v4.0
rocblas_gemm_flags flag; rocblas_gemm_flags flag;
rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag); rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
return flag == rocblas_gemm_flags_pack_int8x4; return flag == rocblas_gemm_flags_pack_int8x4;
#endif
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include <migraphx/dfor.hpp> #include <migraphx/dfor.hpp>
#include <migraphx/op/identity.hpp> #include <migraphx/op/identity.hpp>
#include <migraphx/op/convolution.hpp> #include <migraphx/op/convolution.hpp>
#include <migraphx/op/deconvolution.hpp> #include <migraphx/op/convolution_backwards.hpp>
#include <migraphx/op/quant_convolution.hpp> #include <migraphx/op/quant_convolution.hpp>
#include <migraphx/op/dot.hpp> #include <migraphx/op/dot.hpp>
#include <migraphx/op/quant_dot.hpp> #include <migraphx/op/quant_dot.hpp>
......
...@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm> ...@@ -52,7 +52,6 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto x_type = args[0]->get_shape().type(); auto x_type = args[0]->get_shape().type();
// unsqueeze tensors of shape (C) to broadcast correctly // unsqueeze tensors of shape (C) to broadcast correctly
auto rt = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {0.5}});
auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}}); auto eps = info.add_literal(migraphx::literal{migraphx::shape{x_type}, {epsilon}});
auto scale_unsqueeze = auto scale_unsqueeze =
...@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm> ...@@ -64,11 +63,11 @@ struct parse_batchnorm : op_parser<parse_batchnorm>
auto var_unsqueeze = auto var_unsqueeze =
info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]); info.add_instruction(migraphx::make_op("unsqueeze", {{"axes", {1, 2}}}), args[4]);
auto numer = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze); auto x_sub_mean = info.add_broadcastable_binary_op("sub", args[0], mean_unsqueeze);
auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps); auto var_eps = info.add_broadcastable_binary_op("add", var_unsqueeze, eps);
auto denom = info.add_broadcastable_binary_op("pow", var_eps, rt); auto rsqrt = info.add_instruction(make_op("rsqrt"), var_eps);
auto div0 = info.add_broadcastable_binary_op("div", numer, denom); auto mul0 = info.add_broadcastable_binary_op("mul", scale_unsqueeze, rsqrt);
auto r0 = info.add_broadcastable_binary_op("mul", div0, scale_unsqueeze); auto r0 = info.add_broadcastable_binary_op("mul", x_sub_mean, mul0);
return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze); return info.add_broadcastable_binary_op("add", r0, bias_unsqueeze);
} }
}; };
......
...@@ -35,7 +35,7 @@ bool verify_args(const std::string& name, ...@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool passed = true; bool passed = true;
visit_all(ref_arg, target_arg)([&](auto ref, auto target) { visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
double error; double error;
passed = verify_range(ref, target, tolerance, &error); passed = verify::verify_range(ref, target, tolerance, &error);
if(not passed) if(not passed)
{ {
// TODO: Check for nans // TODO: Check for nans
...@@ -45,27 +45,27 @@ bool verify_args(const std::string& name, ...@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std::cout << "ref:" << ref << std::endl; std::cout << "ref:" << ref << std::endl;
if(target.size() < 32) if(target.size() < 32)
std::cout << "target:" << target << std::endl; std::cout << "target:" << target << std::endl;
if(range_zero(ref)) if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl; std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target)) if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl; std::cout << "Target data is all zeros" << std::endl;
auto mxdiff = max_diff(ref, target); auto mxdiff = verify::max_diff(ref, target);
std::cout << "Max diff: " << mxdiff << std::endl; std::cout << "Max diff: " << mxdiff << std::endl;
auto idx = mismatch_idx(ref, target, float_equal); auto idx = verify::mismatch_idx(ref, target, float_equal);
if(idx < range_distance(ref)) if(idx < verify::range_distance(ref))
{ {
std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx] std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
<< std::endl; << std::endl;
} }
auto ref_nan_idx = find_idx(ref, not_finite); auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0) if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": " std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl; << ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite); auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0) if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": " std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl; << target[target_nan_idx] << std::endl;
...@@ -73,27 +73,27 @@ bool verify_args(const std::string& name, ...@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
} }
else else
{ {
if(range_zero(ref)) if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl; std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target)) if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl; std::cout << "Target data is all zeros" << std::endl;
// auto mxdiff = max_diff(ref, target); // auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl; // std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal); // auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref)) // if(idx < verify::range_distance(ref))
// { // {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx] // std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl; // << std::endl;
// } // }
auto ref_nan_idx = find_idx(ref, not_finite); auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0) if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": " std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl; << ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite); auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0) if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": " std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl; << target[target_nan_idx] << std::endl;
......
...@@ -34,7 +34,6 @@ TEST_CASE(load_and_run) ...@@ -34,7 +34,6 @@ TEST_CASE(load_and_run)
auto shapes_before = p.get_output_shapes(); auto shapes_before = p.get_output_shapes();
migraphx::compile_options options; migraphx::compile_options options;
options.set_offload_copy(); options.set_offload_copy();
options.set_exhaustive_tune_flag();
p.compile(migraphx::target("gpu"), options); p.compile(migraphx::target("gpu"), options);
auto shapes_after = p.get_output_shapes(); auto shapes_after = p.get_output_shapes();
CHECK(shapes_before.size() == 1); CHECK(shapes_before.size() == 1);
......
...@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test) ...@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx::target gpu_t = migraphx::make_target("gpu"); migraphx::target gpu_t = migraphx::make_target("gpu");
run_prog(p, gpu_t, m, gpu_result); run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify_range(ref_result, gpu_result)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
} }
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy) ...@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
auto result = p.eval(pp).back(); auto result = p.eval(pp).back();
std::vector<float> results_vector(ss.elements(), -1); std::vector<float> results_vector(ss.elements(), -1);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(c_vec, results_vector)); EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
} }
TEST_CASE(arguments_lifetime) TEST_CASE(arguments_lifetime)
......
...@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy) ...@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
std::vector<int8_t> val_final; std::vector<int8_t> val_final;
ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); }); ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
EXPECT(migraphx::verify_range(val_orig, val_final)); EXPECT(migraphx::verify::verify_range(val_orig, val_final));
} }
TEST_CASE(int8_quantization) TEST_CASE(int8_quantization)
...@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization) ...@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much // the regular pipeline uses the rewrite_quantization in the much
// earlier stage. // earlier stage.
if(migraphx::gpu::mlir_enabled()) if(migraphx::gpu::mlir_enabled())
EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
else else
EXPECT(migraphx::verify_range(ref_result, gpu_result)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
} }
} }
......
...@@ -34,13 +34,13 @@ ...@@ -34,13 +34,13 @@
#include <migraphx/literal.hpp> #include <migraphx/literal.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/shape.hpp> #include <migraphx/shape.hpp>
#include <migraphx/verify.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/check_shapes.hpp> #include <migraphx/check_shapes.hpp>
#include <migraphx/functional.hpp> #include <migraphx/functional.hpp>
#include <basic_ops.hpp> #include <basic_ops.hpp>
#include <migraphx/compile_options.hpp> #include <migraphx/compile_options.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/generate.hpp>
#include "test.hpp" #include "test.hpp"
// check if it is custom_op or run_on_module operator // check if it is custom_op or run_on_module operator
...@@ -180,38 +180,74 @@ TEST_CASE(multitarget_compile_cpu_gpu) ...@@ -180,38 +180,74 @@ TEST_CASE(multitarget_compile_cpu_gpu)
auto z_param = mm->add_parameter("z", s); auto z_param = mm->add_parameter("z", s);
auto cpu_ins = mm->add_instruction( auto cpu_ins = mm->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {x_param, y_param}, {cpu_mod}); migraphx::make_op("run_on_target", {{"target_id", 1}}), {x_param, y_param}, {cpu_mod});
auto cpu_ins_0 =
mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), cpu_ins);
auto gpu_ins = mm->add_instruction( auto gpu_ins = mm->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {cpu_ins, z_param}, {gpu_mod}); migraphx::make_op("run_on_target", {{"target_id", 0}}), {cpu_ins_0, z_param}, {gpu_mod});
mm->add_return({gpu_ins}); auto gpu_ins_0 =
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}); mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), gpu_ins);
mm->add_return({gpu_ins_0});
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")})); EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")}));
migraphx::parameter_map params;
params["x"] = migraphx::fill_argument(s, 1);
params["y"] = migraphx::fill_argument(s, 2);
params["z"] = migraphx::fill_argument(s, 3);
auto result = p.eval(params).back();
auto gold = migraphx::fill_argument(s, 6);
EXPECT(gold == result);
} }
TEST_CASE(single_target_compile) TEST_CASE(single_target_multi_compile)
{ {
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}}; migraphx::shape boxes_s{migraphx::shape::float_type, {1, 6, 4}};
auto* mm = p.get_main_module();
auto boxes_param = mm->add_parameter("boxes", boxes_s);
auto* gpu_mod = p.create_module("gpu_mod");
auto boxes_param_gpu = gpu_mod->add_parameter("boxes_param_gpu", boxes_s);
migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}}; migraphx::shape scores_s{migraphx::shape::float_type, {1, 1, 6}};
std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3}; std::vector<float> scores_vec = {0.9, 0.75, 0.6, 0.95, 0.5, 0.3};
auto scores_l = gpu_mod->add_literal(migraphx::literal(scores_s, scores_vec));
auto boxes_l = mm->add_parameter("boxes", boxes_s); auto max_out_l = gpu_mod->add_literal(int64_t{4});
auto scores_l = mm->add_literal(migraphx::literal(scores_s, scores_vec)); auto iou_threshold = gpu_mod->add_literal(0.5f);
auto max_out_l = mm->add_literal(int64_t{4}); auto score_threshold = gpu_mod->add_literal(0.0f);
auto iou_threshold = mm->add_literal(0.5f); auto r = gpu_mod->add_instruction(
auto score_threshold = mm->add_literal(0.0f); migraphx::make_op("nonmaxsuppression",
{{"center_point_box", true}, {"use_dyn_output", true}}),
auto r = mm->add_instruction(migraphx::make_op("nonmaxsuppression", {{"center_point_box", 1}}), boxes_param_gpu,
boxes_l, scores_l,
scores_l, max_out_l,
max_out_l, iou_threshold,
iou_threshold, score_threshold);
score_threshold); gpu_mod->add_return({r});
mm->add_return({r});
p.compile(migraphx::make_target("gpu")); auto run_on_gpu = mm->add_instruction(
EXPECT(is_compiled_gpu_module(*p.get_main_module())); migraphx::make_op("run_on_target", {{"target_id", 0}}), {boxes_param}, {gpu_mod});
auto run_on_gpu_0 =
mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_on_gpu);
mm->add_return({run_on_gpu_0});
// compile using multi-target compilation path
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
// need to add "ref" to avoid ambigious call to "compile()"
p.compile({migraphx::make_target("gpu"), migraphx::make_target("ref")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("ref")}));
// eval
migraphx::parameter_map params;
std::vector<float> boxes_vec = {0.5, 0.5, 1.0, 1.0, 0.5, 0.6, 1.0, 1.0, 0.5, 0.4, 1.0, 1.0,
0.5, 10.5, 1.0, 1.0, 0.5, 10.6, 1.0, 1.0, 0.5, 100.5, 1.0, 1.0};
params["boxes"] = migraphx::argument(boxes_s, boxes_vec.data());
auto output = p.eval(params).back();
std::vector<int64_t> gold_vec = {0, 0, 3, 0, 0, 0, 0, 0, 5};
auto gold =
migraphx::argument(migraphx::shape{migraphx::shape::int64_type, {3, 3}}, gold_vec.data());
EXPECT(output == gold);
} }
TEST_CASE(multitarget_compile_if_then_else) TEST_CASE(multitarget_compile_if_then_else)
...@@ -224,54 +260,65 @@ TEST_CASE(multitarget_compile_if_then_else) ...@@ -224,54 +260,65 @@ TEST_CASE(multitarget_compile_if_then_else)
auto x = mm->add_parameter("x", ds); auto x = mm->add_parameter("x", ds);
auto y = mm->add_parameter("y", ds); auto y = mm->add_parameter("y", ds);
auto* then_mod = p.create_module("if_gpu_mod"); auto* then_mod = p.create_module("if_gpu_mod");
std::vector<float> data1 = {0.384804, -1.77948, -0.453775, 0.477438, -1.06333, -1.12893}; std::vector<float> data1(ds.elements(), 1);
auto l1 = then_mod->add_literal(migraphx::literal(ds, data1)); auto l1 = then_mod->add_literal(migraphx::literal(ds, data1));
auto a1 = then_mod->add_instruction(migraphx::make_op("add"), x, l1); auto gpu_x = then_mod->add_parameter("gpu_x", ds);
auto a1 = then_mod->add_instruction(migraphx::make_op("add"), gpu_x, l1);
then_mod->add_return({a1}); then_mod->add_return({a1});
auto* else_mod = p.create_module("else_cpu_mod"); auto* else_mod = p.create_module("else_cpu_mod");
std::vector<float> data2 = {-0.258047, 0.360394, 0.536804, -0.577762, 1.0217, 1.02442}; std::vector<float> data2(ds.elements(), 2);
auto l2 = else_mod->add_literal(migraphx::literal(ds, data2)); auto l2 = else_mod->add_literal(migraphx::literal(ds, data2));
auto a2 = else_mod->add_instruction(migraphx::make_op("mul"), y, l2); auto cpu_y = else_mod->add_parameter("cpu_y", ds);
auto a2 = else_mod->add_instruction(migraphx::make_op("mul"), cpu_y, l2);
else_mod->add_return({a2}); else_mod->add_return({a2});
auto* run_on_cpu_mod = p.create_module("run_on_cpu"); auto* run_on_cpu_mod = p.create_module("run_on_cpu");
auto run_cpu_ins = run_on_cpu_mod->add_instruction( auto run_cpu_ins = run_on_cpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {}, {else_mod}); migraphx::make_op("run_on_target", {{"target_id", 1}}), {y}, {else_mod});
run_on_cpu_mod->add_return({run_cpu_ins}); auto run_cpu_ins_0 = run_on_cpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_cpu_ins);
run_on_cpu_mod->add_return({run_cpu_ins_0});
auto* run_on_gpu_mod = p.create_module("run_on_gpu"); auto* run_on_gpu_mod = p.create_module("run_on_gpu");
auto run_gpu_ins = run_on_gpu_mod->add_instruction( auto run_gpu_ins = run_on_gpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {}, {then_mod}); migraphx::make_op("run_on_target", {{"target_id", 0}}), {x}, {then_mod});
run_on_gpu_mod->add_return({run_gpu_ins}); auto run_gpu_ins_0 = run_on_gpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_gpu_ins);
run_on_gpu_mod->add_return({run_gpu_ins_0});
auto ret = auto ret =
mm->add_instruction(migraphx::make_op("if"), {cond}, {run_on_gpu_mod, run_on_cpu_mod}); mm->add_instruction(migraphx::make_op("if"), {cond}, {run_on_gpu_mod, run_on_cpu_mod});
auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), ret); auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), ret);
mm->add_return({r}); mm->add_return({r});
// compile // compile
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}); migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), migraphx::make_target("cpu")}, {gpu_opts});
EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")})); EXPECT(check_compiled_program(p, {migraphx::make_target("gpu"), migraphx::make_target("cpu")}));
migraphx::parameter_map params;
params["x"] = migraphx::fill_argument(ds, 2);
params["y"] = migraphx::fill_argument(ds, 3);
for(bool cond_val : {true, false})
{
params["cond"] = migraphx::argument(cond_s, &cond_val);
auto result = p.eval(params).back();
auto gold = migraphx::fill_argument(ds, (cond_val ? 3 : 6));
EXPECT(gold == result);
}
} }
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE(multitarget_compile_nested_if_then_else) TEST_CASE(multitarget_compile_nested_if_then_else)
{ {
float seed = 0.0f;
std::mt19937 gen(seed);
std::uniform_real_distribution<> dis(0.0, 1.0);
auto get_random_values = [&](size_t elements) {
std::vector<float> rand_samples(elements);
std::generate(rand_samples.begin(), rand_samples.end(), [&]() { return dis(gen); });
return rand_samples;
};
std::unordered_map<std::size_t, std::size_t> counter_map = {{0, 0}, {1, 0}}; std::unordered_map<std::size_t, std::size_t> counter_map = {{0, 0}, {1, 0}};
migraphx::shape ds{migraphx::shape::float_type, {2, 3}}; migraphx::shape ds{migraphx::shape::float_type, {2, 3}};
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
migraphx::shape cond_s{migraphx::shape::bool_type}; migraphx::shape cond_s{migraphx::shape::bool_type};
auto cond = mm->add_parameter("cond", cond_s); auto cond_0 = mm->add_parameter("cond_0", cond_s);
auto cond_1 = mm->add_parameter("cond_1", cond_s);
auto x = mm->add_parameter("x", ds); auto x = mm->add_parameter("x", ds);
auto y = mm->add_parameter("y", ds); auto y = mm->add_parameter("y", ds);
auto z = mm->add_parameter("z", ds); auto z = mm->add_parameter("z", ds);
...@@ -280,20 +327,22 @@ TEST_CASE(multitarget_compile_nested_if_then_else) ...@@ -280,20 +327,22 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
std::size_t tid) { std::size_t tid) {
std::string mod_name = std::string mod_name =
"target_" + std::to_string(tid) + "_" + std::to_string(counter_map[tid]++); "target_" + std::to_string(tid) + "_" + std::to_string(counter_map[tid]++);
auto* test_mod = prog.create_module(mod_name); auto* test_mod = prog.create_module(mod_name);
std::vector<float> data = get_random_values(ds.elements()); std::vector<float> data(ds.elements(), -1);
auto l1 = test_mod->add_literal(migraphx::literal(ds, data)); auto l1 = test_mod->add_literal(migraphx::literal(ds, data));
auto test_mod_param = test_mod->add_parameter(mod_name, ds); auto test_mod_param_0 = test_mod->add_parameter(mod_name + "_param_0", ds);
// instruction with local literal and main_mod param as inputs auto test_mod_param_1 = test_mod->add_parameter(mod_name + "_param_1", ds);
auto ins1 = test_mod->add_instruction(migraphx::make_op("add"), x, l1); auto test_mod_param_2 = test_mod->add_parameter(mod_name + "_param_2", ds);
// instructinon with local param and local ins as inputs auto ins1 = test_mod->add_instruction(migraphx::make_op("add"), test_mod_param_0, l1);
auto ins2 = test_mod->add_instruction(migraphx::make_op("mul"), ins1, test_mod_param); auto ins2 = test_mod->add_instruction(migraphx::make_op("mul"), ins1, test_mod_param_1);
// instruction with local ins and parent ins as inputs auto ins3 = test_mod->add_instruction(migraphx::make_op("sub"), ins2, test_mod_param_2);
auto ins3 = test_mod->add_instruction(migraphx::make_op("sub"), ins2, inputs.front());
test_mod->add_return({ins3}); test_mod->add_return({ins3});
auto* run_on_target_mod = prog.create_module("run_on_" + mod_name); auto* run_on_target_mod = prog.create_module("run_on_" + mod_name);
run_on_target_mod->add_instruction( auto run_ins = run_on_target_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", tid}}), {inputs.front()}, {test_mod}); migraphx::make_op("run_on_target", {{"target_id", tid}}), inputs, {test_mod});
auto run_ins_0 = run_on_target_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_ins);
run_on_target_mod->add_return({run_ins_0});
return run_on_target_mod; return run_on_target_mod;
}; };
...@@ -307,15 +356,30 @@ TEST_CASE(multitarget_compile_nested_if_then_else) ...@@ -307,15 +356,30 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
ref_mod->add_return({ref_add}); ref_mod->add_return({ref_add});
auto* then_mod = p.create_module("then_mod"); auto* then_mod = p.create_module("then_mod");
auto then_mod_param = then_mod->add_parameter("then_mod_param", ds); auto then_mod_cond = then_mod->add_parameter("then_mod_cond", cond_s);
auto then_mod_ref_ins = then_mod->add_instruction( auto then_mod_param_0 = then_mod->add_parameter("then_mod_param_0", ds);
migraphx::make_op("run_on_target", {{"target_id", 3}}), {then_mod_param, y}, {ref_mod}); auto then_mod_param_1 = then_mod->add_parameter("then_mod_param_1", ds);
auto then_mod_param_2 = then_mod->add_parameter("then_mod_param_2", ds);
auto then_mod_ref_ins =
then_mod->add_instruction(migraphx::make_op("run_on_target", {{"target_id", 3}}),
{then_mod_param_0, then_mod_param_1},
{ref_mod});
auto then_mod_ref_ins_0 = then_mod->add_instruction( auto then_mod_ref_ins_0 = then_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), then_mod_ref_ins); migraphx::make_op("get_tuple_elem", {{"index", 0}}), then_mod_ref_ins);
then_mod->add_instruction( auto then_mod_if = then_mod->add_instruction(
migraphx::make_op("if"), migraphx::make_op("if"),
{cond}, {then_mod_cond,
{create_test_module(p, {z}, 1), create_test_module(p, {then_mod_ref_ins_0}, 0)}); then_mod_param_0,
then_mod_param_1,
then_mod_param_2,
then_mod_ref_ins_0,
then_mod_param_1,
then_mod_param_2},
{create_test_module(p, {then_mod_param_0, then_mod_param_1, then_mod_param_2}, 1),
create_test_module(p, {then_mod_ref_ins_0, then_mod_param_1, then_mod_param_2}, 0)});
auto then_mod_if_0 =
then_mod->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), then_mod_if);
then_mod->add_return({then_mod_if_0});
// create nested else_mod with multiple targets. // create nested else_mod with multiple targets.
// else_mod has one instruction that runs a module on "fpga" and another instruction that // else_mod has one instruction that runs a module on "fpga" and another instruction that
...@@ -326,53 +390,105 @@ TEST_CASE(multitarget_compile_nested_if_then_else) ...@@ -326,53 +390,105 @@ TEST_CASE(multitarget_compile_nested_if_then_else)
auto fpga_add = fpga_mod->add_instruction(migraphx::make_op("add"), fpga_x, fpga_y); auto fpga_add = fpga_mod->add_instruction(migraphx::make_op("add"), fpga_x, fpga_y);
fpga_mod->add_return({fpga_add}); fpga_mod->add_return({fpga_add});
auto* else_mod = p.create_module("else_mod"); auto* else_mod = p.create_module("else_mod");
auto else_mod_param = else_mod->add_parameter("else_mod_param", ds); auto else_mod_cond = else_mod->add_parameter("else_mod_cond", cond_s);
auto else_mod_fpga_ins = else_mod->add_instruction( auto else_mod_param_0 = else_mod->add_parameter("else_mod_param_0", ds);
migraphx::make_op("run_on_target", {{"target_id", 2}}), {else_mod_param, y}, {fpga_mod}); auto else_mod_param_1 = else_mod->add_parameter("else_mod_param_1", ds);
auto else_mod_param_2 = else_mod->add_parameter("else_mod_param_2", ds);
auto else_mod_fpga_ins =
else_mod->add_instruction(migraphx::make_op("run_on_target", {{"target_id", 2}}),
{else_mod_param_0, else_mod_param_2},
{fpga_mod});
auto else_mod_fpga_ins_0 = else_mod->add_instruction( auto else_mod_fpga_ins_0 = else_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), else_mod_fpga_ins); migraphx::make_op("get_tuple_elem", {{"index", 0}}), else_mod_fpga_ins);
else_mod->add_instruction(migraphx::make_op("if"), auto else_mod_if = else_mod->add_instruction(
{cond}, migraphx::make_op("if"),
{create_test_module(p, {else_mod_fpga_ins_0}, 0), {else_mod_cond,
create_test_module(p, {else_mod_param}, 1)}); else_mod_fpga_ins_0,
else_mod_param_0,
else_mod_param_1,
else_mod_param_2,
else_mod_param_1,
else_mod_param_0},
{create_test_module(p, {else_mod_fpga_ins_0, else_mod_param_0, else_mod_param_1}, 0),
create_test_module(p, {else_mod_param_2, else_mod_param_1, else_mod_param_0}, 1)});
auto else_mod_if_0 =
else_mod->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), else_mod_if);
else_mod->add_return({else_mod_if_0});
// Create nested and multi-target main module using "If" // Create nested and multi-target main module using "If"
auto main_if_ins = auto main_if_ins = mm->add_instruction(
mm->add_instruction(migraphx::make_op("if"), {cond, x}, {then_mod, else_mod}); migraphx::make_op("if"), {cond_0, cond_1, x, y, z, cond_1, x, y, z}, {then_mod, else_mod});
auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), main_if_ins); auto r = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), main_if_ins);
mm->add_return({r}); mm->add_return({r});
// compile // compile
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), p.compile({migraphx::make_target("gpu"),
migraphx::make_target("cpu"), migraphx::make_target("cpu"),
migraphx::make_target("fpga"), migraphx::make_target("ref"),
migraphx::make_target("ref")}); migraphx::make_target("ref")},
{gpu_opts});
EXPECT(check_compiled_program(p, EXPECT(check_compiled_program(p,
{migraphx::make_target("gpu"), {migraphx::make_target("gpu"),
migraphx::make_target("cpu"), migraphx::make_target("cpu"),
migraphx::make_target("fpga"), migraphx::make_target("ref"),
migraphx::make_target("ref")})); migraphx::make_target("ref")}));
// do evaluation using different conditions
// TODO: make two conditional to cover all the paths
migraphx::parameter_map params;
float x_i = 2.0;
float y_i = 3.0;
float z_i = 4.0;
params["x"] = migraphx::fill_argument(ds, x_i);
params["y"] = migraphx::fill_argument(ds, y_i);
params["z"] = migraphx::fill_argument(ds, z_i);
// cover all paths with different combination of conditions
std::vector<std::pair<bool, bool>> test_conds = {
{true, true}, {true, false}, {false, true}, {false, false}};
for(auto [cond_val_0, cond_val_1] : test_conds)
{
params["cond_0"] = migraphx::argument(cond_s, &cond_val_0);
params["cond_1"] = migraphx::argument(cond_s, &cond_val_1);
auto result = p.eval(params).back();
// main has one instruction that is : if_then_else
// then mod is doing : {tmp = x+y; (cond) ? (((x-1)*y)-z) : (((tmp-1)*y)-z);}
// else mod is doing : {tmp = x+z; (cond) ? (((tmp-1)*x)-y) : (((z-1)*y)-x);}
float gold_i = -1.0;
if(cond_val_0)
{
float tmp_i = x_i + y_i;
gold_i = (cond_val_1) ? (((x_i - 1) * y_i) - z_i) : (((tmp_i - 1) * y_i) - z_i);
}
else
{
float tmp_i = x_i + z_i;
gold_i = (cond_val_1) ? (((tmp_i - 1) * x_i) - y_i) : (((z_i - 1) * y_i) - x_i);
}
auto gold = migraphx::fill_argument(ds, gold_i);
EXPECT(gold == result);
}
} }
// TODO : FPGA compilation is broken right now, below test mentions fpga but doesn't compile for it
TEST_CASE(multitarget_select_module) TEST_CASE(multitarget_select_module)
{ {
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module();
migraphx::shape lit_s{migraphx::shape{migraphx::shape::float_type, {1}}};
auto literal_ins = mm->add_literal(migraphx::literal{lit_s, {6}});
// create batch submodules // create batch submodules
auto create_submodule = [&](std::size_t batch_size, const std::string& module_name) { auto create_submodule = [&](std::size_t batch_size, const std::string& module_name) {
auto* submod = p.create_module(module_name); auto* submod = p.create_module(module_name);
migraphx::shape sm_shape{migraphx::shape::float_type, {batch_size, 4}}; migraphx::shape sm_shape{migraphx::shape::float_type, {batch_size, 4}};
auto sm_input = submod->add_parameter("data", sm_shape); auto sm_input = submod->add_parameter("data", sm_shape);
migraphx::shape lit_s{migraphx::shape{migraphx::shape::float_type, {1}}};
auto literal_ins = submod->add_literal(migraphx::literal{lit_s, {6}});
auto broadcast_lit = auto broadcast_lit =
submod->add_instruction(migraphx::make_op("multibroadcast"), literal_ins, sm_input); submod->add_instruction(migraphx::make_op("multibroadcast"), literal_ins, sm_input);
auto add_ins0 = submod->add_instruction(migraphx::make_op("add"), sm_input, broadcast_lit); auto add_ins0 = submod->add_instruction(migraphx::make_op("add"), sm_input, broadcast_lit);
auto add_ins1 = submod->add_instruction(migraphx::make_op("add"), add_ins0, broadcast_lit); auto add_ins1 = submod->add_instruction(migraphx::make_op("add"), add_ins0, broadcast_lit);
submod->add_return({add_ins0, add_ins1}); submod->add_return({add_ins1});
return submod; return submod;
}; };
auto* batch1 = create_submodule(1, "batch_1"); auto* batch1 = create_submodule(1, "batch_1");
...@@ -380,36 +496,45 @@ TEST_CASE(multitarget_select_module) ...@@ -380,36 +496,45 @@ TEST_CASE(multitarget_select_module)
auto* batch3 = create_submodule(3, "batch_3"); auto* batch3 = create_submodule(3, "batch_3");
auto* batch4 = create_submodule(4, "batch_4"); auto* batch4 = create_submodule(4, "batch_4");
migraphx::shape s{migraphx::shape::float_type, {{1, 4}, {4, 4}}};
auto input = mm->add_parameter("data", s);
auto* run_cpu_mod = p.create_module("cpu_mod"); auto* run_cpu_mod = p.create_module("cpu_mod");
auto cpu_param = run_cpu_mod->add_parameter( auto cpu_param =
"cpu_data", migraphx::shape{migraphx::shape::float_type, {1, 4}}); run_cpu_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {1, 4}});
auto run_cpu_ins = run_cpu_mod->add_instruction( auto run_cpu_ins = run_cpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 1}}), {cpu_param}, {batch1}); migraphx::make_op("run_on_target", {{"target_id", 1}}), {cpu_param}, {batch1});
run_cpu_mod->add_return({run_cpu_ins}); auto run_cpu_ins_0 = run_cpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_cpu_ins);
run_cpu_mod->add_return({run_cpu_ins_0});
auto* run_gpu_mod = p.create_module("gpu_mod"); auto* run_gpu_mod = p.create_module("gpu_mod");
auto gpu_param = run_gpu_mod->add_parameter( auto gpu_param =
"gpu_data", migraphx::shape{migraphx::shape::float_type, {2, 4}}); run_gpu_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {2, 4}});
auto run_gpu_ins = run_gpu_mod->add_instruction( auto run_gpu_ins = run_gpu_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 0}}), {gpu_param}, {batch2}); migraphx::make_op("run_on_target", {{"target_id", 0}}), {gpu_param}, {batch2});
run_gpu_mod->add_return({run_gpu_ins}); auto run_gpu_ins_0 = run_gpu_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_gpu_ins);
run_gpu_mod->add_return({run_gpu_ins_0});
auto* run_fpga_mod = p.create_module("fpga_mod"); auto* run_fpga_mod = p.create_module("fpga_mod");
auto fpga_param = run_fpga_mod->add_parameter( auto fpga_param =
"fpga_data", migraphx::shape{migraphx::shape::float_type, {3, 4}}); run_fpga_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {3, 4}});
auto run_fpga_ins = run_fpga_mod->add_instruction( auto run_fpga_ins = run_fpga_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 2}}), {fpga_param}, {batch3}); migraphx::make_op("run_on_target", {{"target_id", 2}}), {fpga_param}, {batch3});
run_fpga_mod->add_return({run_fpga_ins}); auto run_fpga_ins_0 = run_fpga_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_fpga_ins);
run_fpga_mod->add_return({run_fpga_ins_0});
auto* run_ref_mod = p.create_module("ref_mod"); auto* run_ref_mod = p.create_module("ref_mod");
auto ref_param = run_fpga_mod->add_parameter( auto ref_param =
"ref_data", migraphx::shape{migraphx::shape::float_type, {4, 4}}); run_ref_mod->add_parameter("data", migraphx::shape{migraphx::shape::float_type, {4, 4}});
auto run_ref_ins = run_ref_mod->add_instruction( auto run_ref_ins = run_ref_mod->add_instruction(
migraphx::make_op("run_on_target", {{"target_id", 3}}), {ref_param}, {batch4}); migraphx::make_op("run_on_target", {{"target_id", 3}}), {ref_param}, {batch4});
run_ref_mod->add_return({run_ref_ins}); auto run_ref_ins_0 = run_ref_mod->add_instruction(
migraphx::make_op("get_tuple_elem", {{"index", 0}}), run_ref_ins);
run_ref_mod->add_return({run_ref_ins_0});
auto* mm = p.get_main_module();
migraphx::shape dyn_s{migraphx::shape::float_type, {{1, 4}, {4, 4}}};
auto input = mm->add_parameter("data", dyn_s);
std::vector<migraphx::shape> sub_shapes = {}; std::vector<migraphx::shape> sub_shapes = {};
sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}}); sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}});
sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}}); sub_shapes.push_back(migraphx::shape{migraphx::shape::float_type, {{1, 4}, {4, 4}}});
...@@ -419,18 +544,34 @@ TEST_CASE(multitarget_select_module) ...@@ -419,18 +544,34 @@ TEST_CASE(multitarget_select_module)
{input}, {input},
{run_cpu_mod, run_gpu_mod, run_fpga_mod, run_ref_mod}); {run_cpu_mod, run_gpu_mod, run_fpga_mod, run_ref_mod});
auto ret0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), sm_ins); auto ret0 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 0}}), sm_ins);
auto ret1 = mm->add_instruction(migraphx::make_op("get_tuple_elem", {{"index", 1}}), sm_ins); mm->add_return({ret0});
mm->add_return({ret0, ret1});
// compile // compile
migraphx::compile_options gpu_opts;
gpu_opts.offload_copy = true;
p.compile({migraphx::make_target("gpu"), p.compile({migraphx::make_target("gpu"),
migraphx::make_target("cpu"), migraphx::make_target("cpu"),
migraphx::make_target("fpga"), migraphx::make_target("ref"),
migraphx::make_target("ref")}); migraphx::make_target("ref")},
{gpu_opts});
EXPECT(check_compiled_program(p, EXPECT(check_compiled_program(p,
{migraphx::make_target("gpu"), {migraphx::make_target("gpu"),
migraphx::make_target("cpu"), migraphx::make_target("cpu"),
migraphx::make_target("fpga"), migraphx::make_target("ref"),
migraphx::make_target("ref")})); migraphx::make_target("ref")}));
// program does the 12+x where x has dynamic shape {{1, 4}, {4, 4}}
for(const size_t bs : {1, 2, 3, 4})
{
migraphx::shape arg_shape{migraphx::shape::float_type, {bs, 4}};
migraphx::parameter_map params;
params["data"] = migraphx::generate_argument(arg_shape, arg_shape.elements());
std::vector<float> input_data;
params["data"].visit([&](const auto& vec) { input_data.assign(vec.begin(), vec.end()); });
std::transform(input_data.begin(), input_data.end(), input_data.begin(), [](const auto& i) {
return i + 12.0;
});
auto result = p.eval(params).back();
EXPECT(migraphx::argument(arg_shape, input_data.data()) == result);
}
} }
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
3be6eb53c8b359703cb645ed2cb1cdf106924b7c d3295f4329d744fe1f8419e1220e123807282b99
conv_transpose_auto_pad_test:±
:
x
wyconv1" ConvTranspose*
auto_pad"
SAME_UPPER conv_transpose_auto_pad_testZ
x




Z
w




b
y




B
\ No newline at end of file
deconv_bias_test:ž conv_transpose_bias_test:¦
" "
x x
w w
byconv1" ConvTransposedeconv_bias_testZ byconv1" ConvTransposeconv_transpose_bias_testZ
x x
 
 
...@@ -24,4 +24,4 @@ ...@@ -24,4 +24,4 @@
 
 
 
B B
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment