Unverified Commit 0b2bcf2c authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into add_parity_check_ci

parents fbacefca dcc7b0a5
......@@ -790,22 +790,26 @@ struct find_layernorm_pointwise
{
auto matcher() const
{
return precompile_name("pointwise")(match::arg(0)(
return precompile_name("pointwise")(match::any_of[match::inputs()](
precompile_name("gpu::prelayernorm", "gpu::preadd_layernorm").bind("layernorm")));
}
void apply(module& m, const match::matcher_result& r) const
{
auto ins = r.result;
auto pw_ins = r.result;
auto layernorm = r.instructions["layernorm"];
if(not layernorm->module_inputs().empty())
return;
auto* pm = ins->module_inputs().front();
auto* pm = pw_ins->module_inputs().front();
auto pw_inputs = pw_ins->inputs();
auto ln_pos = std::find(pw_inputs.begin(), pw_inputs.end(), layernorm);
assert(ln_pos != pw_inputs.end());
pw_inputs.erase(ln_pos);
auto inputs = layernorm->inputs();
inputs.pop_back();
inputs.insert(inputs.end(), ins->inputs().begin() + 1, ins->inputs().end());
inputs.insert(inputs.end(), pw_inputs.begin(), pw_inputs.end());
m.replace_instruction(ins, layernorm->get_operator(), inputs, {pm});
m.replace_instruction(pw_ins, layernorm->get_operator(), inputs, {pm});
}
};
......
......@@ -81,7 +81,7 @@ struct roialign_compiler : compiler<roialign_compiler>
// coord_trans_mode
auto ctm = v.at("coordinate_transformation_mode").to<std::string>();
float rois_offset = (ctm == "output_half_pixel") ? -0.5f : 0.0f;
float rois_offset = (ctm == "half_pixel") ? -0.5f : 0.0f;
options.params += " -DROIS_OFFSET=" + std::to_string(rois_offset);
// spatial_scale
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -40,6 +40,7 @@
#include <migraphx/op/if_op.hpp>
#include <migraphx/op/reshape.hpp>
#include <migraphx/op/quant_dot.hpp>
#include <migraphx/op/reshape_lazy.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/gpu/lowering.hpp>
......@@ -89,7 +90,6 @@ struct miopen_apply
offload_copy = (mod == mpm->get_root_module()) ? pass->offload_copy : false;
add_generic_op("contiguous");
add_extend_op("argmax");
add_extend_op("argmin");
add_extend_op("logsoftmax");
......@@ -115,6 +115,7 @@ struct miopen_apply
add_neg_op();
add_nms_op();
add_select_module_op();
add_reshape_lazy_op();
}
void copy_params() const
......@@ -376,6 +377,32 @@ struct miopen_apply
return mod->replace_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
});
}
/**
* Adds reshape lazy to reshape ops that can be aliased instead of copied.
* `gpu::contiguous` are added before and after the reshape; these contiguous
* instructions can be removed by the eliminate_contiguous pass.
*/
void add_reshape_lazy_op()
{
apply_map.emplace("reshape", [=](instruction_ref ins) {
std::vector<instruction_ref> before_contiguous_args = ins->inputs();
auto before_alloc = insert_allocation(ins, std::prev(ins)->get_shape());
before_contiguous_args.push_back(before_alloc);
auto before_contig =
mod->insert_instruction(ins, make_op("gpu::contiguous"), {before_contiguous_args});
auto new_lazy_reshape = mod->insert_instruction(
ins,
make_op("reshape_lazy", {{"dims", {ins->get_operator().to_value().at("dims")}}}),
before_contig);
std::vector<instruction_ref> after_contiguous_args = {new_lazy_reshape};
auto after_alloc = insert_allocation(new_lazy_reshape, new_lazy_reshape->get_shape());
after_contiguous_args.push_back(after_alloc);
return mod->replace_instruction(ins, make_op("gpu::contiguous"), after_contiguous_args);
});
}
};
void lowering::apply(module_pass_manager& mpm) const
......
......@@ -24,6 +24,7 @@
#include "migraphx/make_op.hpp"
#include <migraphx/stringutils.hpp>
#include <migraphx/gpu/mlir.hpp>
#include <ostream>
#ifdef MIGRAPHX_MLIR
#include <mlir-c/IR.h>
......@@ -34,6 +35,7 @@
#include <mlir-c/Dialect/Rock.h>
#include <mlir-c/IntegerSet.h>
#include <mlir-c/Pass.h>
#include <mlir-c/Support.h>
#include <mutex>
#if !defined(MLIR_MIGRAPHX_DIALECT_API_VERSION) || MLIR_MIGRAPHX_DIALECT_API_VERSION != 3
#warning "Incompatible version of rocMLIR library used, disabling"
......@@ -180,13 +182,85 @@ std::string mlir_print(F f, T x)
return ss.str();
}
struct mlir_logger
{
std::stringstream ss;
mlir_context* ctx;
std::optional<MlirDiagnosticHandlerID> id;
mlir_logger() : ctx(nullptr), id(std::nullopt) {}
mlir_logger(mlir_context* context) : ctx(context)
{
id =
mlirContextAttachDiagnosticHandler(ctx->get(), mlir_diagnostic_print_cb, this, nullptr);
}
~mlir_logger()
{
if(id.has_value())
mlirContextDetachDiagnosticHandler(ctx->get(), *id);
}
mlir_logger(const mlir_logger& other) = delete;
mlir_logger& operator=(const mlir_logger& other) = delete;
mlir_logger(mlir_logger&& other) noexcept
: ss(std::move(other.ss)), ctx(other.ctx), id(other.id)
{
other.ctx = nullptr;
other.id = std::nullopt;
}
mlir_logger& operator=(mlir_logger other) noexcept
{
std::swap(ss, other.ss);
std::swap(ctx, other.ctx);
std::swap(id, other.id);
return *this;
}
std::string str() const { return ss.str(); }
void clear() { ss = std::stringstream{}; }
static MlirLogicalResult mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger);
MlirLogicalResult handle(MlirDiagnostic diag);
};
MlirLogicalResult mlir_logger::mlir_diagnostic_print_cb(MlirDiagnostic diag, void* logger)
{
return reinterpret_cast<mlir_logger*>(logger)->handle(diag);
}
MlirLogicalResult mlir_logger::handle(MlirDiagnostic diag)
{
MlirDiagnosticSeverity sev = mlirDiagnosticGetSeverity(diag);
switch(sev)
{
case MlirDiagnosticSeverity::MlirDiagnosticError: ss << "Error: "; break;
case MlirDiagnosticSeverity::MlirDiagnosticWarning: ss << "Warning: "; break;
case MlirDiagnosticSeverity::MlirDiagnosticNote: ss << "Note: "; break;
case MlirDiagnosticSeverity::MlirDiagnosticRemark: ss << "Remark: "; break;
}
mlir_print(mlirDiagnosticPrint, diag, [&](auto s) { ss << s; });
ss << std::endl;
for(intptr_t i = 0, e = mlirDiagnosticGetNumNotes(diag); i < e; ++i)
{
(void)handle(mlirDiagnosticGetNote(diag, i));
}
return mlirLogicalResultSuccess();
}
struct mlir_program
{
mlir_program()
: ctx(mlirContextCreateWithRegistry(get_dialect_registry().get(),
/*threadingEnable=*/false)),
location(mlirLocationUnknownGet(ctx.get())),
mmodule(mlirModuleCreateEmpty(location))
mmodule(mlirModuleCreateEmpty(location)),
logger(&ctx)
{
mlirContextSetThreadPool(ctx.get(), get_thread_pool().get());
mlirContextLoadAllAvailableDialects(ctx.get());
......@@ -614,21 +688,49 @@ struct mlir_program
}
}
void run_high_level_pipeline() MIGRAPHX_TIDY_CONST
void run_high_level_pipeline()
{
mlir_pass_manager pm_front{mlirPassManagerCreate(ctx.get())};
mlirMIGraphXAddHighLevelPipeline(pm_front.get());
mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));
logger.clear();
if(mlirLogicalResultIsFailure(
mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()))))
{
std::string error = "Invalid MLIR created: " + logger.str();
if(enabled(MIGRAPHX_TRACE_MLIR{}))
{
std::cout << error << std::endl;
}
MIGRAPHX_THROW(error);
}
}
void run_backend_pipeline() MIGRAPHX_TIDY_CONST
void run_backend_pipeline()
{
mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));
logger.clear();
const size_t trace = value_of(MIGRAPHX_TRACE_MLIR{});
static std::mutex mutex;
auto mod_op = mlirModuleGetOperation(mmodule.get());
if(trace >= 2)
{
const std::lock_guard<std::mutex> lock(mutex);
std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl;
}
if(mlirLogicalResultIsFailure(mlirPassManagerRunOnOp(pm_back.get(), mod_op)))
{
std::string error = "MLIR backend compilation failed: " + logger.str();
if(enabled(MIGRAPHX_TRACE_MLIR{}))
{
std::cout << error << std::endl;
}
MIGRAPHX_THROW(error);
}
}
code_object_op compile(const value& solution) MIGRAPHX_TIDY_CONST
code_object_op compile(const value& solution)
{
// 1st pipeline to call
run_high_level_pipeline();
......@@ -682,7 +784,7 @@ struct mlir_program
MIGRAPHX_THROW("Failed setting tuning key: " + *str);
}
tuning_config get_tuning_config(bool exhaustive) MIGRAPHX_TIDY_CONST
tuning_config get_tuning_config(bool exhaustive)
{
tuning_config tc;
run_high_level_pipeline();
......@@ -702,7 +804,8 @@ struct mlir_program
if(perf_key_bytes > perf_key.size())
MIGRAPHX_THROW("Tuning perf key was " + std::to_string(perf_key_bytes) +
" bytes and thus too long");
tc.solutions.emplace_back(perf_key.begin(), perf_key.begin() + perf_key_bytes);
tc.solutions.emplace_back(
std::string(perf_key.begin(), perf_key.begin() + perf_key_bytes));
}
std::array<char, ROCMLIR_TUNING_KEY_BUFSZ> tuning_key;
size_t tuning_key_bytes =
......@@ -809,6 +912,7 @@ struct mlir_program
mlir_context ctx;
MlirLocation location;
mlir_module mmodule;
mlir_logger logger;
problem_params pp;
std::deque<std::string> strings{};
std::string target_arch = "";
......
......@@ -48,6 +48,7 @@
#include <migraphx/rewrite_quantization.hpp>
#include <migraphx/rewrite_rnn.hpp>
#include <migraphx/schedule.hpp>
#include <migraphx/simplify_dyn_ops.hpp>
#include <migraphx/simplify_qdq.hpp>
#include <migraphx/simplify_reshapes.hpp>
#include <migraphx/split_single_dyn_dim.hpp>
......@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
{
split_single_dyn_dim{},
dead_code_elimination{},
simplify_dyn_ops{},
dead_code_elimination{},
normalize_ops{},
dead_code_elimination{},
simplify_qdq{},
......
......@@ -28,19 +28,20 @@ namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
bool verify_args(const std::string& name,
const argument& ref_arg,
const argument& target_arg,
double tolerance)
const verify::expected<argument>& ref_arg,
verify::tolerance tols)
{
bool passed = true;
visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
double error;
passed = verify::verify_range(ref, target, tolerance, &error);
visit_all(ref_arg.data(), target_arg)([&](auto ref, auto target) {
double rms_error;
passed =
verify::verify_range_with_tolerance(target, verify::expected{ref}, tols, &rms_error);
if(not passed)
{
// TODO: Check for nans
std::cout << "FAILED: " << name << std::endl;
std::cout << "error: " << error << std::endl;
std::cout << "RMS Error: " << rms_error << std::endl;
if(ref.size() < 32)
std::cout << "ref:" << ref << std::endl;
if(target.size() < 32)
......@@ -93,5 +94,16 @@ bool verify_args(const std::string& name,
return passed;
}
bool verify_args_with_tolerance(const std::string& name,
const argument& target_arg,
const verify::expected<argument>& ref_arg,
std::size_t tolerance)
{
double rms_tol = 0.001;
target_arg.visit([&](auto ta) { rms_tol = verify::get_rms_tol(ta, tolerance); });
verify::tolerance tols{rms_tol};
return verify_args(name, target_arg, ref_arg, tols);
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
......@@ -25,97 +25,19 @@
cmake_policy(SET CMP0057 NEW)
find_package(Threads REQUIRED)
include(ProcessorCount)
ProcessorCount(N)
set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR} --timeout 5000)
add_custom_target(tests)
rocm_test_link_libraries(Threads::Threads migraphx migraphx_ref migraphx_onnx migraphx_tf)
rocm_test_include_directories(include)
set(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS Off CACHE BOOL "")
if(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS)
add_compile_definitions(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS)
endif()
find_program(MIGRAPHX_GDB gdb)
if(MIGRAPHX_GDB)
set(MIGRAPHX_TEST_GDB On CACHE BOOL "")
else()
set(MIGRAPHX_TEST_GDB Off CACHE BOOL "")
endif()
set(SKIP_TESTS)
function(add_test_command NAME EXE)
if(NAME IN_LIST SKIP_TESTS)
add_test(NAME ${NAME} COMMAND echo skipped)
set_tests_properties(${NAME} PROPERTIES DISABLED On)
elseif(WIN32)
set(WINPATH)
foreach(PATH ${CMAKE_FIND_ROOT_PATH})
list(APPEND WINPATH ${PATH}/bin)
endforeach()
file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd"
CONTENT "set PATH=${WINPATH};%PATH%
%1 ${ARGN}")
add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
else()
if(MIGRAPHX_TEST_GDB)
# add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
# --batch
# --return-child-result
# -ex "set disable-randomization off"
# -ex run
# -ex backtrace
# --args $<TARGET_FILE:${EXE}> ${ARGN})
set(TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/gdb/test_${NAME})
file(MAKE_DIRECTORY ${TEST_DIR})
if(NOT EXISTS ${TEST_DIR})
message(FATAL_ERROR "Failed to create test directory: ${TEST_DIR}")
endif()
file(GENERATE OUTPUT "${TEST_DIR}/run.cmake"
CONTENT "
# Remove previous core dump
file(REMOVE ${TEST_DIR}/core)
execute_process(COMMAND $<TARGET_FILE:${EXE}> ${ARGN} WORKING_DIRECTORY ${TEST_DIR} RESULT_VARIABLE RESULT)
if(NOT RESULT EQUAL 0)
# TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
if(EXISTS ${TEST_DIR}/core)
set(\$ENV{UBSAN_OPTIONS} print_stacktrace=1)
set(\$ENV{ASAN_OPTIONS} print_stacktrace=1)
execute_process(COMMAND ${MIGRAPHX_GDB} $<TARGET_FILE:${EXE}> ${TEST_DIR}/core -batch -ex bt)
endif()
message(FATAL_ERROR \"Test failed\")
endif()
")
add_test(NAME ${NAME} COMMAND ${CMAKE_COMMAND} -P "${TEST_DIR}/run.cmake")
else()
add_test(NAME ${NAME} COMMAND ${EXE} ${ARGN})
endif()
endif()
set_tests_properties(${NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
endfunction()
function(add_test_executable TEST_NAME)
add_executable(${TEST_NAME} EXCLUDE_FROM_ALL ${ARGN})
set(TEST_COMMAND ${TEST_NAME})
add_test_command(${TEST_NAME} ${TEST_COMMAND})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
target_link_libraries(${TEST_NAME} Threads::Threads migraphx migraphx_onnx migraphx_ref)
target_include_directories(${TEST_NAME} PUBLIC include)
endfunction(add_test_executable)
file(GLOB TESTS CONFIGURE_DEPENDS *.cpp)
foreach(TEST ${TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_${BASE_NAME} ${TEST})
rocm_add_test_executable(test_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_${BASE_NAME})
endforeach()
......@@ -125,7 +47,7 @@ if(MIGRAPHX_ENABLE_GPU)
foreach(TEST ${GPU_TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_gpu_${BASE_NAME} ${TEST})
rocm_add_test_executable(test_gpu_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_gpu_${BASE_NAME})
set_tests_properties(test_gpu_${BASE_NAME} PROPERTIES
COST 10
......@@ -144,7 +66,7 @@ if(MIGRAPHX_ENABLE_FPGA)
foreach(TEST ${FPGA_TESTS})
get_filename_component(BASE_NAME ${TEST} NAME_WE)
add_test_executable(test_fpga_${BASE_NAME} ${TEST})
rocm_add_test_executable(test_fpga_${BASE_NAME} ${TEST})
rocm_clang_tidy_check(test_fpga_${BASE_NAME})
set_tests_properties(test_fpga_${BASE_NAME} PROPERTIES
COST 10
......@@ -166,19 +88,17 @@ foreach(ONNX_TEST ${ONNX_TESTS})
target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_ref)
target_include_directories(${TEST_NAME} PUBLIC include)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_ONNX_DIR})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
rocm_mark_as_test(${TEST_NAME})
endforeach()
# tf test
set(TEST_TF_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tf)
add_executable(test_tf tf/tf_test.cpp)
rocm_mark_as_test(test_tf)
rocm_clang_tidy_check(test_tf)
target_link_libraries(test_tf migraphx_tf)
target_include_directories(test_tf PUBLIC include)
add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${TEST_TF_DIR})
add_dependencies(tests test_tf)
add_dependencies(check test_tf)
add_subdirectory(api)
add_subdirectory(verify)
......@@ -201,8 +121,7 @@ if(MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA)
target_link_libraries(${TEST_NAME} migraphx migraphx_onnx migraphx_tf migraphx_all_targets)
target_include_directories(${TEST_NAME} PUBLIC include)
add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_MULTI_TARGET_DIR})
add_dependencies(tests ${TEST_NAME})
add_dependencies(check ${TEST_NAME})
rocm_mark_as_test(${TEST_NAME})
endforeach()
endif()
......@@ -219,7 +138,7 @@ int main() {}\n"
#endif
\n"
)
add_test_executable(${NAME}
rocm_add_test_executable(${NAME}
${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp
)
......@@ -241,13 +160,13 @@ test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/ref/*.hpp)
if(MIGRAPHX_ENABLE_GPU)
test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
test_headers(migraphx/gpu HEADERS ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp DEPENDS migraphx_gpu)
endif()
if(MIGRAPHX_ENABLE_CPU)
test_headers(migraphx/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp)
test_headers(migraphx/cpu HEADERS ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp migraphx_cpu)
endif()
if(MIGRAPHX_ENABLE_FPGA)
test_headers(migraphx/fpga ${CMAKE_SOURCE_DIR}/src/targets/fpga/include/migraphx/fpga/*.hpp)
test_headers(migraphx/fpga HEADERS ${CMAKE_SOURCE_DIR}/src/targets/fpga/include/migraphx/fpga/*.hpp migraphx_fpga)
endif()
......@@ -158,6 +158,31 @@ TEST_CASE(two_transpose_gather)
EXPECT(m1 == m2);
}
TEST_CASE(standard_reshape_lazy)
{
migraphx::module m1;
{
auto data = m1.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto add = m1.add_instruction(migraphx::make_op("add"), data, data);
auto r =
m1.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), add);
m1.add_return({r});
}
run_pass(m1);
migraphx::module m2;
{
auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto add = m2.add_instruction(migraphx::make_op("add"), data, data);
auto ca = m2.add_instruction(migraphx::make_op("contiguous"), add);
auto r =
m2.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), ca);
m2.add_return({r});
}
EXPECT(m1 == m2);
}
TEST_CASE(standard_reshape)
{
migraphx::module m1;
......@@ -173,8 +198,7 @@ TEST_CASE(standard_reshape)
{
auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
auto add = m2.add_instruction(migraphx::make_op("add"), data, data);
auto ca = m2.add_instruction(migraphx::make_op("contiguous"), add);
auto r = m2.add_instruction(migraphx::make_op("reshape", {{"dims", {2, 1, 12, 5}}}), ca);
auto r = m2.add_instruction(migraphx::make_op("reshape", {{"dims", {2, 1, 12, 5}}}), add);
m2.add_return({r});
}
......
......@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx::target gpu_t = migraphx::make_target("gpu");
run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
EXPECT(migraphx::verify::verify_rms_range(gpu_result, ref_result));
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "make_precompile_op.hpp"
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/gpu/fuse_ops.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <basic_ops.hpp>
#include <migraphx/make_op.hpp>
#include <test.hpp>
#include <pointwise.hpp>
void run_pass(migraphx::program& p)
{
migraphx::run_passes(p, {migraphx::gpu::fuse_ops{}, migraphx::dead_code_elimination{}});
}
TEST_CASE(layernorm_pointwise)
{
migraphx::shape s{migraphx::shape::float_type, {2, 3, 4}};
auto create_program = [=](bool first_arg_layernorm) {
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto z = mm->add_parameter("z", s);
auto alloc = migraphx::make_op("allocate", {{"shape", to_value(s)}});
auto alloc_ins = mm->add_instruction(alloc);
auto* pw_add1 =
create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add"));
auto add1 =
mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1});
auto alloc_ins2 = mm->add_instruction(alloc);
auto layernorm_ins =
mm->add_instruction(make_precompile_op("gpu::prelayernorm"), add1, alloc_ins2);
std::vector<migraphx::instruction_ref> pw_inputs = {layernorm_ins, z};
if(not first_arg_layernorm)
{
pw_inputs = {z, layernorm_ins};
}
auto* pw_add2 =
create_pointwise_module(p, "main:pointwise1", pw_inputs, single_pointwise("add"));
auto alloc_ins3 = mm->add_instruction(alloc);
pw_inputs.push_back(alloc_ins3);
auto add2 = mm->add_instruction(make_precompile_op("pointwise"), pw_inputs, {pw_add2});
mm->add_return({add2});
return p;
};
auto create_fused_program = [=]() {
migraphx::program p;
auto* mm = p.get_main_module();
auto x = mm->add_parameter("x", s);
auto y = mm->add_parameter("y", s);
auto z = mm->add_parameter("z", s);
auto alloc = migraphx::make_op("allocate", {{"shape", to_value(s)}});
auto alloc_ins = mm->add_instruction(alloc);
auto* pw_add1 =
create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add"));
auto add1 =
mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1});
auto alloc_ins2 = mm->add_instruction(alloc);
auto* pw_add2 =
create_pointwise_module(p, "main:pointwise1", {x, z}, single_pointwise("add"));
auto layernorm_ins = mm->add_instruction(
make_precompile_op("gpu::prelayernorm"), {add1, z, alloc_ins2}, {pw_add2});
mm->add_return({layernorm_ins});
return p;
};
{
migraphx::program p1 = create_program(true);
run_pass(p1);
migraphx::program p2 = create_fused_program();
EXPECT(p1 == p2);
}
{
migraphx::program p1 = create_program(false);
run_pass(p1);
migraphx::program p2 = create_fused_program();
EXPECT(p1 == p2);
}
}
int main(int argc, const char* argv[]) { test::run(argc, argv); }
......@@ -53,7 +53,6 @@ TEST_CASE(host_same_buffer_copy)
migraphx::parameter_map pp;
std::vector<float> a_vec(ss.elements(), -1);
std::vector<float> b_vec(ss.elements(), 2);
std::vector<float> c_vec(ss.elements(), 0);
pp["a"] = migraphx::argument(ss, a_vec.data());
pp["b"] = migraphx::argument(ss, b_vec.data());
std::vector<float> gpu_result;
......@@ -64,7 +63,8 @@ TEST_CASE(host_same_buffer_copy)
auto result = p.eval(pp).back();
std::vector<float> results_vector(ss.elements(), -1);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
std::vector<float> gold_vec(ss.elements(), 0);
EXPECT(migraphx::verify::verify_rms_range(results_vector, gold_vec));
}
TEST_CASE(arguments_lifetime)
......
......@@ -133,7 +133,8 @@ bool verify_mlir(const migraphx::module& mmlir)
auto inputs = generate_params(ref);
auto mlir = create_program_from_mlir(mmlir);
return migraphx::verify_args("mlir", run_ref(ref, inputs), run_gpu(mlir, inputs));
return migraphx::verify_args_with_tolerance(
"mlir", run_gpu(mlir, inputs), migraphx::verify::expected{run_ref(ref, inputs)});
}
TEST_CASE(conv)
......
......@@ -40,7 +40,6 @@
TEST_CASE(gpu_target_copy)
{
migraphx::target gpu_t = migraphx::make_target("gpu");
migraphx::target ref_t = migraphx::make_target("ref");
migraphx::shape s{migraphx::shape::int8_type, {2, 3, 4, 5}};
auto ref_arg_orig = migraphx::generate_argument(s, 0x123456L);
......@@ -52,7 +51,7 @@ TEST_CASE(gpu_target_copy)
std::vector<int8_t> val_final;
ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
EXPECT(migraphx::verify::verify_range(val_orig, val_final));
EXPECT(migraphx::verify::verify_rms_range(val_orig, val_final));
}
TEST_CASE(int8_quantization)
......@@ -118,9 +117,12 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much
// earlier stage.
if(migraphx::gpu::mlir_enabled())
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
EXPECT(migraphx::verify::verify_range_with_tolerance(
gpu_result,
migraphx::verify::expected{ref_result},
migraphx::verify::tolerance{0.01}));
else
EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
EXPECT(migraphx::verify::verify_rms_range(gpu_result, ref_result));
}
}
......
......@@ -24,16 +24,16 @@
#ifndef MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#define MIGRAPHX_GUARD_TEST_INCLUDE_POINTWISE_HPP
#include <migraphx/instruction_ref.hpp>
#include <migraphx/program.hpp>
#include <migraphx/module.hpp>
#include <migraphx/make_op.hpp>
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
migraphx::module_ref mm,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
migraphx::module_ref create_pointwise_module(migraphx::program& p,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = p.create_module(name);
pm->set_bypass();
......@@ -44,6 +44,17 @@ migraphx::instruction_ref add_pointwise(migraphx::program& p,
});
auto r = f(pm, params);
pm->add_return({r});
return pm;
}
template <class F>
migraphx::instruction_ref add_pointwise(migraphx::program& p,
migraphx::module_ref mm,
const std::string& name,
std::vector<migraphx::instruction_ref> inputs,
F f)
{
auto* pm = create_pointwise_module(p, name, inputs, f);
return mm->add_instruction(migraphx::make_op("pointwise"), inputs, {pm});
}
......
377f959c69e9f213cd4a8c71a5e80162a412989a
6d7bc2a097a1a08541cd0d4628831c79ab8092d5
constant_no_attributes_test:)
"Constantconstant_no_attributes_testB
\ No newline at end of file
constant_value_int_test:7
"Constant*
value_int@ constant_value_int_testB
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment