manual merge

b9d37172 · Khalique Ahmed · 1af66a1c · ea62d7aa · b9d37172 · b9d37172
Commit b9d37172 authored Oct 10, 2023 by Khalique Ahmed
20 changed files
--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -48,6 +48,7 @@
 #include <migraphx/rewrite_quantization.hpp>
 #include <migraphx/rewrite_rnn.hpp>
 #include <migraphx/schedule.hpp>
+#include <migraphx/simplify_dyn_ops.hpp>
 #include <migraphx/simplify_qdq.hpp>
 #include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/split_single_dyn_dim.hpp>
@@ -109,6 +110,8 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
    {
        split_single_dyn_dim{},
        dead_code_elimination{},
+        simplify_dyn_ops{},
+        dead_code_elimination{},
        normalize_ops{},
        dead_code_elimination{},
        simplify_qdq{},

--- a/src/targets/gpu/time_op.cpp
+++ b/src/targets/gpu/time_op.cpp
@@ -34,7 +34,7 @@ namespace gpu {
 std::vector<argument> generate_arguments(const std::vector<shape>& shapes, unsigned long seed = 0)
 {
    std::vector<argument> args;
-    std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](auto& s) {
+    std::transform(shapes.begin(), shapes.end(), std::back_inserter(args), [&](const auto& s) {
        return to_gpu(generate_argument(s, seed++));
    });
    return args;

--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/tf/tf_parser.cpp
+++ b/src/tf/tf_parser.cpp
@@ -338,7 +338,7 @@ void tf_parser::parse_node(const std::string& name)
            std::string input_name = input;
            // if input has trailing `:0` index then remove it
            auto multi_out_idx = input.find(':');
-            if(multi_out_idx != std::string::npos && input.substr(multi_out_idx + 1) == "0")
+            if(multi_out_idx != std::string::npos and input.substr(multi_out_idx + 1) == "0")
            {
                input_name = input.substr(0, multi_out_idx);
            }

--- a/src/value.cpp
+++ b/src/value.cpp
@@ -285,7 +285,7 @@ bool value::contains(const std::string& pkey) const
 }
 std::size_t value::size() const
 {
-    auto* a = if_array_impl(x);
+    const auto* a = if_array_impl(x);
    if(a == nullptr)
        return 0;
    return a->size();

--- a/src/verify_args.cpp
+++ b/src/verify_args.cpp
@@ -28,19 +28,20 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

 bool verify_args(const std::string& name,
-                 const argument& ref_arg,
                 const argument& target_arg,
-                 double tolerance)
+                 const verify::expected<argument>& ref_arg,
+                 verify::tolerance tols)
 {
    bool passed = true;
-    visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
-        double error;
-        passed = verify::verify_range(ref, target, tolerance, &error);
+    visit_all(ref_arg.data(), target_arg)([&](auto ref, auto target) {
+        double rms_error;
+        passed =
+            verify::verify_range_with_tolerance(target, verify::expected{ref}, tols, &rms_error);
        if(not passed)
        {
            // TODO: Check for nans
            std::cout << "FAILED: " << name << std::endl;
-            std::cout << "error: " << error << std::endl;
+            std::cout << "RMS Error: " << rms_error << std::endl;
            if(ref.size() < 32)
                std::cout << "ref:" << ref << std::endl;
            if(target.size() < 32)
@@ -78,16 +79,6 @@ bool verify_args(const std::string& name,
            if(verify::range_zero(target))
                std::cout << "Target data is all zeros" << std::endl;

-            // auto mxdiff = max_diff(ref, target);
-            // std::cout << "Max diff: " << mxdiff << std::endl;
-
-            // auto idx = mismatch_idx(ref, target, float_equal);
-            // if(idx < verify::range_distance(ref))
-            // {
-            //     std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
-            //               << std::endl;
-            // }
-
            auto ref_nan_idx = find_idx(ref, verify::not_finite);
            if(ref_nan_idx >= 0)
                std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
@@ -97,11 +88,22 @@ bool verify_args(const std::string& name,
            if(target_nan_idx >= 0)
                std::cout << "Non finite number found in target at " << target_nan_idx << ": "
                          << target[target_nan_idx] << std::endl;
-            // std::cout << std::endl;
+            std::cout << "MIGraphX verification passed successfully." << std::endl;
        }
    });
    return passed;
 }

+bool verify_args_with_tolerance(const std::string& name,
+                                const argument& target_arg,
+                                const verify::expected<argument>& ref_arg,
+                                std::size_t tolerance)
+{
+    double rms_tol = 0.001;
+    target_arg.visit([&](auto ta) { rms_tol = verify::get_rms_tol(ta, tolerance); });
+    verify::tolerance tols{rms_tol};
+    return verify_args(name, target_arg, ref_arg, tols);
+}
+
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -25,98 +25,19 @@
 cmake_policy(SET CMP0057 NEW)

 find_package(Threads REQUIRED)
-include(ProcessorCount)
-ProcessorCount(N)
-set(CTEST_PARALLEL_LEVEL ${N} CACHE STRING "CTest parallel level")
-add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -j ${CTEST_PARALLEL_LEVEL} -C ${CMAKE_CFG_INTDIR} --timeout 5000)
-add_custom_target(tests)
-
-find_program(MIGRAPHX_GDB gdb)
-
-if(MIGRAPHX_GDB)
-    set(MIGRAPHX_TEST_GDB On CACHE BOOL "")
-else()
-    set(MIGRAPHX_TEST_GDB Off CACHE BOOL "")
-endif()
-
-set(SKIP_TESTS)
-
-function(add_test_command NAME EXE)
-    if(NAME IN_LIST SKIP_TESTS)
-        add_test(NAME ${NAME} COMMAND echo skipped)
-        set_tests_properties(${NAME} PROPERTIES DISABLED On)
-    elseif(WIN32)
-        set(WINPATH)
-
-        foreach(PATH ${CMAKE_FIND_ROOT_PATH})
-            list(APPEND WINPATH ${PATH}/bin)
-        endforeach()
-
-        file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd"
-            CONTENT "set PATH=${WINPATH};%PATH%
-                    %1 ${ARGN}")
-        add_test(NAME ${NAME} COMMAND ${WINE_CMD} cmd /c "${CMAKE_CURRENT_BINARY_DIR}/test_${NAME}.cmd" $<TARGET_FILE:${EXE}>)
-    else()
-        if(MIGRAPHX_TEST_GDB)
-            # add_test(NAME ${NAME} COMMAND ${MIGRAPHX_GDB}
-            # --batch
-            # --return-child-result
-            # -ex "set disable-randomization off"
-            # -ex run
-            # -ex backtrace
-            # --args $<TARGET_FILE:${EXE}> ${ARGN})
-            set(TEST_DIR ${CMAKE_CURRENT_BINARY_DIR}/gdb/test_${NAME})
-            file(MAKE_DIRECTORY ${TEST_DIR})
-
-            if(NOT EXISTS ${TEST_DIR})
-                message(FATAL_ERROR "Failed to create test directory: ${TEST_DIR}")
-            endif()
-
-            file(GENERATE OUTPUT "${TEST_DIR}/run.cmake"
-                CONTENT "
-                # Remove previous core dump
-                file(REMOVE ${TEST_DIR}/core)
-                execute_process(COMMAND $<TARGET_FILE:${EXE}> ${ARGN} WORKING_DIRECTORY ${TEST_DIR} RESULT_VARIABLE RESULT)
-                if(NOT RESULT EQUAL 0)
-                    # TODO: check for core files based on pid when setting /proc/sys/kernel/core_uses_pid
-                    if(EXISTS ${TEST_DIR}/core)
-                        set(\$ENV{UBSAN_OPTIONS} print_stacktrace=1)
-                        set(\$ENV{ASAN_OPTIONS} print_stacktrace=1)
-                        execute_process(COMMAND ${MIGRAPHX_GDB} $<TARGET_FILE:${EXE}> ${TEST_DIR}/core -batch -ex bt)
-                    endif()
-                    message(FATAL_ERROR \"Test failed\")
-                endif()
-            ")
-            add_test(NAME ${NAME} COMMAND ${CMAKE_COMMAND} -P "${TEST_DIR}/run.cmake")
-        else()
-            add_test(NAME ${NAME} COMMAND ${EXE} ${ARGN})
-        endif()
-    endif()
-
-    set_tests_properties(${NAME} PROPERTIES FAIL_REGULAR_EXPRESSION "FAILED")
-endfunction()
+rocm_test_link_libraries(Threads::Threads migraphx migraphx_ref migraphx_onnx migraphx_tf)
+rocm_test_include_directories(include)

-function(add_test_executable TEST_NAME)
-    add_executable(${TEST_NAME} EXCLUDE_FROM_ALL ${ARGN})
-    target_link_libraries(${TEST_NAME} ${CMAKE_THREAD_LIBS_INIT})
-
-    # Cmake does not add flags correctly for gcc
-    if(CMAKE_CXX_COMPILER_ID MATCHES "GNU")
-        set_target_properties(${TEST_NAME} PROPERTIES COMPILE_FLAGS -pthread LINK_FLAGS -pthread)
-    endif()
-    set(TEST_COMMAND ${TEST_NAME})
-    add_test_command(${TEST_NAME} ${TEST_COMMAND})
-    add_dependencies(tests ${TEST_NAME})
-    add_dependencies(check ${TEST_NAME})
-    target_link_libraries(${TEST_NAME} migraphx migraphx_onnx migraphx_ref)
-    target_include_directories(${TEST_NAME} PUBLIC include)
-endfunction(add_test_executable)
+set(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS Off CACHE BOOL "")
+if(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS)
+    add_compile_definitions(MIGRAPHX_DISABLE_LARGE_BUFFER_TESTS)
+endif()

 file(GLOB TESTS CONFIGURE_DEPENDS *.cpp)

 foreach(TEST ${TESTS})
    get_filename_component(BASE_NAME ${TEST} NAME_WE)
-    add_test_executable(test_${BASE_NAME} ${TEST})
+    rocm_add_test_executable(test_${BASE_NAME} ${TEST})
    rocm_clang_tidy_check(test_${BASE_NAME})
 endforeach()

@@ -126,7 +47,7 @@ if(MIGRAPHX_ENABLE_GPU)

    foreach(TEST ${GPU_TESTS})
        get_filename_component(BASE_NAME ${TEST} NAME_WE)
-        add_test_executable(test_gpu_${BASE_NAME} ${TEST})
+        rocm_add_test_executable(test_gpu_${BASE_NAME} ${TEST})
        rocm_clang_tidy_check(test_gpu_${BASE_NAME})
        set_tests_properties(test_gpu_${BASE_NAME} PROPERTIES
            COST 10
@@ -145,7 +66,7 @@ if(MIGRAPHX_ENABLE_FPGA)

    foreach(TEST ${FPGA_TESTS})
        get_filename_component(BASE_NAME ${TEST} NAME_WE)
-        add_test_executable(test_fpga_${BASE_NAME} ${TEST})
+        rocm_add_test_executable(test_fpga_${BASE_NAME} ${TEST})
        rocm_clang_tidy_check(test_fpga_${BASE_NAME})
        set_tests_properties(test_fpga_${BASE_NAME} PROPERTIES
            COST 10
@@ -167,22 +88,21 @@ foreach(ONNX_TEST ${ONNX_TESTS})
    target_link_libraries(${TEST_NAME} migraphx_onnx migraphx_ref)
    target_include_directories(${TEST_NAME} PUBLIC include)
    add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_ONNX_DIR})
-    add_dependencies(tests ${TEST_NAME})
-    add_dependencies(check ${TEST_NAME})
+    rocm_mark_as_test(${TEST_NAME})
 endforeach()

 # tf test
 set(TEST_TF_DIR ${CMAKE_CURRENT_SOURCE_DIR}/tf)
 add_executable(test_tf tf/tf_test.cpp)
+rocm_mark_as_test(test_tf)
 rocm_clang_tidy_check(test_tf)
 target_link_libraries(test_tf migraphx_tf)
 target_include_directories(test_tf PUBLIC include)
 add_test(NAME test_tf COMMAND $<TARGET_FILE:test_tf> WORKING_DIRECTORY ${TEST_TF_DIR})
-add_dependencies(tests test_tf)
-add_dependencies(check test_tf)

 add_subdirectory(api)
 add_subdirectory(verify)
+add_subdirectory(ref)

 if(MIGRAPHX_ENABLE_PYTHON)
    add_subdirectory(py)
@@ -201,20 +121,24 @@ if(MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA)
        target_link_libraries(${TEST_NAME} migraphx migraphx_onnx migraphx_tf migraphx_all_targets)
        target_include_directories(${TEST_NAME} PUBLIC include)
        add_test(NAME ${TEST_NAME} COMMAND $<TARGET_FILE:${TEST_NAME}> WORKING_DIRECTORY ${TEST_MULTI_TARGET_DIR})
-        add_dependencies(tests ${TEST_NAME})
-        add_dependencies(check ${TEST_NAME})
+        rocm_mark_as_test(${TEST_NAME})
    endforeach()
 endif()


 function(test_header NAME HEADER)
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
-        "#include <${HEADER}>\nint main() {}\n"
+    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp "
+#include <${HEADER}>
+int main() {}\n"
    )
-    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp
-        "#include <${HEADER}>\n"
+    file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp "
+#include <${HEADER}>
+#if defined(min) || defined(max) || defined(near) || defined(far)
+#error \"Do not include windows.h in header files\"
+#endif
+\n"
    )
-    add_test_executable(${NAME}
+    rocm_add_test_executable(${NAME}
        ${CMAKE_CURRENT_BINARY_DIR}/header-main-include-${NAME}.cpp
        ${CMAKE_CURRENT_BINARY_DIR}/header-static-include-${NAME}.cpp
    )
@@ -236,13 +160,13 @@ test_headers(migraphx ${CMAKE_SOURCE_DIR}/src/include/migraphx/*.hpp)
 test_headers(migraphx/ref ${CMAKE_SOURCE_DIR}/src/targets/ref/include/migraphx/ref/*.hpp)

 if(MIGRAPHX_ENABLE_GPU)
-    test_headers(migraphx/gpu ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp)
+    test_headers(migraphx/gpu HEADERS ${CMAKE_SOURCE_DIR}/src/targets/gpu/include/migraphx/gpu/*.hpp DEPENDS migraphx_gpu)
 endif()
 if(MIGRAPHX_ENABLE_CPU)
-    test_headers(migraphx/cpu ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp)
+    test_headers(migraphx/cpu HEADERS ${CMAKE_SOURCE_DIR}/src/targets/cpu/include/migraphx/cpu/*.hpp migraphx_cpu)
 endif()
 if(MIGRAPHX_ENABLE_FPGA)
-    test_headers(migraphx/fpga ${CMAKE_SOURCE_DIR}/src/targets/fpga/include/migraphx/fpga/*.hpp)
+    test_headers(migraphx/fpga HEADERS ${CMAKE_SOURCE_DIR}/src/targets/fpga/include/migraphx/fpga/*.hpp migraphx_fpga)
 endif()


--- a/test/api/test_cpu.cpp
+++ b/test/api/test_cpu.cpp
@@ -145,15 +145,15 @@ TEST_CASE(zero_parameter)

 TEST_CASE(set_scalar_parameter)
 {
-    auto p1 = migraphx::parse_onnx("add_bcast_test.onnx");
-    migraphx::shape s1(migraphx_shape_float_type, {3, 4});
+    auto p1 = migraphx::parse_onnx("implicit_add_bcast_test.onnx");
+    migraphx::shape s1(migraphx_shape_float_type, {3, 4, 1});
    auto param_shapes = p1.get_parameter_shapes();
    auto s1_orig      = param_shapes["1"];
    CHECK(bool{s1 == s1_orig});

    migraphx::onnx_options option;
    option.set_input_parameter_shape("1", {});
-    auto p2 = migraphx::parse_onnx("add_bcast_test.onnx", option);
+    auto p2 = migraphx::parse_onnx("implicit_add_bcast_test.onnx", option);
    migraphx::shape s_scalar(migraphx_shape_float_type);
    auto param_shapes_1 = p2.get_parameter_shapes();
    auto s_scalar_after = param_shapes_1["1"];

--- a/test/auto_contiguous_test.cpp
+++ b/test/auto_contiguous_test.cpp
@@ -160,6 +160,31 @@ TEST_CASE(two_transpose_gather)
    EXPECT(m1 == m2);
 }

+TEST_CASE(standard_reshape_lazy)
+{
+    migraphx::module m1;
+    {
+        auto data = m1.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
+        auto add  = m1.add_instruction(migraphx::make_op("add"), data, data);
+        auto r =
+            m1.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), add);
+        m1.add_return({r});
+    }
+    run_pass(m1);
+
+    migraphx::module m2;
+    {
+        auto data = m2.add_parameter("2x2", {migraphx::shape::float_type, {2, 3, 4, 5}});
+        auto add  = m2.add_instruction(migraphx::make_op("add"), data, data);
+        auto ca   = m2.add_instruction(migraphx::make_op("contiguous"), add);
+        auto r =
+            m2.add_instruction(migraphx::make_op("reshape_lazy", {{"dims", {2, 1, 12, 5}}}), ca);
+        m2.add_return({r});
+    }
+
+    EXPECT(m1 == m2);
+}
+
 TEST_CASE(standard_reshape)
 {
    migraphx::module m1;

--- a/test/check_shapes_test.cpp
+++ b/test/check_shapes_test.cpp
@@ -31,24 +31,39 @@

 using migraphx::shape;

-bool create_shapes(bool dynamic_allowed)
+void create_shapes(bool dynamic_allowed)
 {
-    try
-    {
    shape a{shape::int64_type, {3}};
    shape b{shape::float_type, {{3, 6}, {4, 4}}};
-        auto op = migraphx::make_op("add");
-        migraphx::check_shapes{{a, b}, op, dynamic_allowed}.has(2);
-        return true;
-    }
-    catch(...)
-    {
-        return false;
-    }
+    migraphx::check_shapes{{a, b}, "", dynamic_allowed}.has(2);
 }

-TEST_CASE(allow_dynamic_shape) { EXPECT(create_shapes(true)); }
+TEST_CASE(allow_dynamic_shape)
+{
+    EXPECT(not test::throws([] { create_shapes(true); }));
+}
+
+TEST_CASE(fail_dynamic_shape)
+{
+    EXPECT(test::throws([] { create_shapes(false); }));
+}

-TEST_CASE(fail_dynamic_shape) { EXPECT(not create_shapes(false)); }
+TEST_CASE(same_layout_fail)
+{
+    EXPECT(test::throws([] {
+        shape a{shape::float_type, {2, 3}};
+        shape b{shape::float_type, {2, 3}, {1, 2}};
+        migraphx::check_shapes{{a, b}, ""}.same_layout();
+    }));
+}
+
+TEST_CASE(same_layout_pass)
+{
+    EXPECT(not test::throws([] {
+        shape a{shape::float_type, {2, 3}, {1, 2}};
+        shape b{shape::float_type, {2, 3}, {1, 2}};
+        migraphx::check_shapes{{a, b}, ""}.same_layout();
+    }));
+}

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/common_dims.cpp
+++ b/test/common_dims.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/common_dims.hpp>
+#include <test.hpp>
+
+using axes_map = std::vector<std::vector<std::size_t>>;
+
+TEST_CASE(common_d1_less)
+{
+    auto cd = migraphx::common_dims::compute({2, 32, 40, 8}, {2, 1280, 8});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1}, {2}, {3}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1, 2}, {3}});
+}
+
+TEST_CASE(common1)
+{
+    auto cd = migraphx::common_dims::compute({2, 32, 2560}, {2, 1280, 8, 8});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1}, {2, 3, 4}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1, 2}, {3}, {4}});
+}
+
+TEST_CASE(common2)
+{
+    auto cd = migraphx::common_dims::compute({2, 1280, 8, 8}, {2, 32, 2560});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1, 2}, {3}, {4}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1}, {2, 3, 4}});
+}
+
+TEST_CASE(common_error1)
+{
+    auto cd = migraphx::common_dims::compute({6, 35}, {3, 7, 2, 5});
+    EXPECT(cd.dims.empty());
+}
+
+TEST_CASE(common_error2)
+{
+    auto cd = migraphx::common_dims::compute({3, 7, 2, 5}, {6, 35});
+    EXPECT(cd.dims.empty());
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/eliminate_contiguous_test.cpp
+++ b/test/eliminate_contiguous_test.cpp
@@ -196,15 +196,47 @@ TEST_CASE(contiguous_pointwise)
            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
        auto yc  = mm->add_instruction(migraphx::make_op("contiguous"), yb);
        auto add = add_pointwise(p, "main:pointwise0", {x, yc}, single_pointwise("add"));
-        mm->add_instruction(pass_op{}, add);
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
    }
    auto count = std::distance(mm->begin(), mm->end());
    run_pass(*mm);
-    EXPECT(std::distance(mm->begin(), mm->end()) == (count - 1));
+    EXPECT(std::distance(mm->begin(), mm->end()) == (count - 2));
    EXPECT(std::none_of(
        mm->begin(), mm->end(), [](auto&& ins) { return ins.name() == "contiguous"; }));
 }

+TEST_CASE(contiguous_nhwc_pointwise)
+{
+    auto s =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {2, 3, 8, 8}, {0, 2, 3, 1});
+    migraphx::program p1;
+    {
+        auto* mm = p1.get_main_module();
+        auto x   = mm->add_parameter("x", s);
+        auto y   = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
+        auto yb  = mm->add_instruction(
+            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
+        auto yc   = mm->add_instruction(migraphx::make_op("contiguous"), yb);
+        auto add  = add_pointwise(p1, "main:pointwise0", {x, yc}, single_pointwise("add"));
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
+    }
+    run_pass(*p1.get_main_module());
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s);
+        auto y   = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
+        auto yb  = mm->add_instruction(
+            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
+        auto add  = add_pointwise(p2, "main:pointwise0", {x, yb}, single_pointwise("add"));
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
+    }
+    EXPECT(p1 == p2);
+}
+
 TEST_CASE(slice_contiguous)
 {
    migraphx::module m;

--- a/test/eliminate_pad_test.cpp
+++ b/test/eliminate_pad_test.cpp
@@ -27,7 +27,7 @@
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/instruction.hpp>
 #include <basic_ops.hpp>
-#include <migraphx/operators.hpp>
+#include <migraphx/op/common.hpp>
 #include <migraphx/make_op.hpp>

 #include <test.hpp>
@@ -58,9 +58,8 @@ create_conv(migraphx::instruction_ref& l_img,
    migraphx::shape s_weights{migraphx::shape::int32_type, {4, channels, 3, 3}};
    std::vector<int32_t> weights(4 * channels * 3 * 3);
    auto l_weights = m.add_literal(migraphx::literal{s_weights, weights});
-    migraphx::op::convolution op;
-    op.padding_mode = padding_mode;
-    return m.add_instruction(op, l_img, l_weights);
+    return m.add_instruction(
+        migraphx::make_op("convolution", {{"padding_mode", padding_mode}}), l_img, l_weights);
 }

 TEST_CASE(rewrite_pad)

--- a/test/float_equal.cpp
+++ b/test/float_equal.cpp
@@ -112,7 +112,10 @@ TEST_CASE_REGISTER(test_limits<double, int>);
 TEST_CASE_REGISTER(test_limits<double, migraphx::half>);
 TEST_CASE_REGISTER(test_limits<float, int>);
 TEST_CASE_REGISTER(test_limits<int, migraphx::half>);
+#ifndef _WIN32
+// On Windows, types int and long have the same min and max values.
 TEST_CASE_REGISTER(test_limits<long, int>);
+#endif
 TEST_CASE_REGISTER(test_limits<long, char>);

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fuse_pointwise.cpp
+++ b/test/fuse_pointwise.cpp
@@ -21,8 +21,9 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/fuse_pointwise.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/eliminate_contiguous.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/program.hpp>
@@ -361,4 +362,154 @@ TEST_CASE(no_input)
    EXPECT(p == p2);
 }

+TEST_CASE(add_reshape_add)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {3, 10, 16}};
+    migraphx::shape s2{migraphx::shape::float_type, {3, 40, 2, 2}};
+    migraphx::shape s3{migraphx::shape::float_type, {3, 10, 4, 2, 2}};
+    migraphx::program p1;
+    {
+        auto* mm  = p1.get_main_module();
+        auto x    = mm->add_parameter("x", s1);
+        auto y    = mm->add_parameter("y", s1);
+        auto z    = mm->add_parameter("z", s2);
+        auto add1 = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), add1);
+        auto add2 = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), x);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), y);
+        auto z2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), z);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z2}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd);
+        mm->add_return({reshape});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_reshape_add_nonstandard)
+{
+    migraphx::shape s1 =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {3, 10, 16}, {2, 0, 1});
+    migraphx::shape s2{migraphx::shape::float_type, {3, 40, 2, 2}};
+    migraphx::shape s3{migraphx::shape::float_type, {3, 10, 4, 2, 2}};
+    migraphx::program p1;
+    {
+        auto* mm     = p1.get_main_module();
+        auto x       = mm->add_parameter("x", s1);
+        auto y       = mm->add_parameter("y", s1);
+        auto z       = mm->add_parameter("z", s2);
+        auto add1    = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto c       = mm->add_instruction(migraphx::make_op("contiguous"), add1);
+        auto reshape = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), c);
+        auto add2    = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cx);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cy);
+        auto z2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), z);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z2}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd);
+        mm->add_return({reshape});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_unsqueeze_add_nonstandard)
+{
+    migraphx::shape s1 =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {3, 10, 16}, {2, 0, 1});
+    migraphx::shape s2{migraphx::shape::float_type, {3, 10, 1, 16}};
+    migraphx::program p1;
+    {
+        auto* mm       = p1.get_main_module();
+        auto x         = mm->add_parameter("x", s1);
+        auto y         = mm->add_parameter("y", s1);
+        auto z         = mm->add_parameter("z", s2);
+        auto add1      = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto unsqueeze = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), add1);
+        auto add2      = mm->add_instruction(migraphx::make_op("add"), unsqueeze, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cx);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cy);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        mm->add_return({fadd});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_reshape_add_error)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {6, 35}};
+    migraphx::shape s2{migraphx::shape::float_type, {3, 7, 2, 5}};
+    migraphx::program p1;
+    {
+        auto* mm  = p1.get_main_module();
+        auto x    = mm->add_parameter("x", s1);
+        auto y    = mm->add_parameter("y", s1);
+        auto z    = mm->add_parameter("z", s2);
+        auto add1 = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), add1);
+        auto add2 = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm   = p2.get_main_module();
+        auto x     = mm->add_parameter("x", s1);
+        auto y     = mm->add_parameter("y", s1);
+        auto z     = mm->add_parameter("z", s2);
+        auto fadd1 = add_pointwise(p2, "main:pointwise0", {x, y}, single_pointwise("add"));
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd1);
+        auto fadd2 = add_pointwise(p2, "main:pointwise1", {reshape, z}, single_pointwise("add"));
+        mm->add_return({fadd2});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/codegen_literal.cpp
+++ b/test/gpu/codegen_literal.cpp
@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
    migraphx::target gpu_t = migraphx::make_target("gpu");
    run_prog(p, gpu_t, m, gpu_result);

-    EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
+    EXPECT(migraphx::verify::verify_rms_range(gpu_result, ref_result));
 }

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/fuse_ops.cpp
+++ b/test/gpu/fuse_ops.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "make_precompile_op.hpp"
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/gpu/fuse_ops.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/program.hpp>
+#include <basic_ops.hpp>
+#include <migraphx/make_op.hpp>
+#include <test.hpp>
+#include <pointwise.hpp>
+
+void run_pass(migraphx::program& p)
+{
+    migraphx::run_passes(p, {migraphx::gpu::fuse_ops{}, migraphx::dead_code_elimination{}});
+}
+
+TEST_CASE(layernorm_pointwise)
+{
+    migraphx::shape s{migraphx::shape::float_type, {2, 3, 4}};
+    auto create_program = [=](bool first_arg_layernorm) {
+        migraphx::program p;
+        auto* mm       = p.get_main_module();
+        auto x         = mm->add_parameter("x", s);
+        auto y         = mm->add_parameter("y", s);
+        auto z         = mm->add_parameter("z", s);
+        auto alloc     = migraphx::make_op("allocate", {{"shape", to_value(s)}});
+        auto alloc_ins = mm->add_instruction(alloc);
+        auto* pw_add1 =
+            create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add"));
+        auto add1 =
+            mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1});
+        auto alloc_ins2 = mm->add_instruction(alloc);
+        auto layernorm_ins =
+            mm->add_instruction(make_precompile_op("gpu::prelayernorm"), add1, alloc_ins2);
+        std::vector<migraphx::instruction_ref> pw_inputs = {layernorm_ins, z};
+        if(not first_arg_layernorm)
+        {
+            pw_inputs = {z, layernorm_ins};
+        }
+        auto* pw_add2 =
+            create_pointwise_module(p, "main:pointwise1", pw_inputs, single_pointwise("add"));
+        auto alloc_ins3 = mm->add_instruction(alloc);
+        pw_inputs.push_back(alloc_ins3);
+        auto add2 = mm->add_instruction(make_precompile_op("pointwise"), pw_inputs, {pw_add2});
+        mm->add_return({add2});
+        return p;
+    };
+
+    auto create_fused_program = [=]() {
+        migraphx::program p;
+        auto* mm       = p.get_main_module();
+        auto x         = mm->add_parameter("x", s);
+        auto y         = mm->add_parameter("y", s);
+        auto z         = mm->add_parameter("z", s);
+        auto alloc     = migraphx::make_op("allocate", {{"shape", to_value(s)}});
+        auto alloc_ins = mm->add_instruction(alloc);
+        auto* pw_add1 =
+            create_pointwise_module(p, "main:pointwise0", {x, y}, single_pointwise("add"));
+        auto add1 =
+            mm->add_instruction(make_precompile_op("pointwise"), {x, y, alloc_ins}, {pw_add1});
+        auto alloc_ins2 = mm->add_instruction(alloc);
+        auto* pw_add2 =
+            create_pointwise_module(p, "main:pointwise1", {x, z}, single_pointwise("add"));
+        auto layernorm_ins = mm->add_instruction(
+            make_precompile_op("gpu::prelayernorm"), {add1, z, alloc_ins2}, {pw_add2});
+        mm->add_return({layernorm_ins});
+        return p;
+    };
+
+    {
+        migraphx::program p1 = create_program(true);
+        run_pass(p1);
+        migraphx::program p2 = create_fused_program();
+        EXPECT(p1 == p2);
+    }
+    {
+        migraphx::program p1 = create_program(false);
+        run_pass(p1);
+        migraphx::program p2 = create_fused_program();
+        EXPECT(p1 == p2);
+    }
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/jit.cpp
+++ b/test/gpu/jit.cpp
@@ -218,6 +218,15 @@ TEST_CASE(compile_warnings)
 #endif
 }

+TEST_CASE(has_flags)
+{
+    EXPECT(migraphx::gpu::hip_has_flags({"--std=c++17"}));
+    EXPECT(not migraphx::gpu::hip_has_flags({"--non-existent-flag-to-test-in-migraphx"}));
+    EXPECT(migraphx::gpu::hip_has_flags({"-Wunused-parameter"}));
+    EXPECT(not migraphx::gpu::hip_has_flags(
+        {"-Wnon-existent-warnings-flag-to-test-in-migraphx", "-Werror"}));
+}
+
 TEST_CASE(code_object_hip)
 {
    auto binaries = migraphx::gpu::compile_hip_src(

--- a/test/gpu/manage_host_buffer.cpp
+++ b/test/gpu/manage_host_buffer.cpp
@@ -53,7 +53,6 @@ TEST_CASE(host_same_buffer_copy)
    migraphx::parameter_map pp;
    std::vector<float> a_vec(ss.elements(), -1);
    std::vector<float> b_vec(ss.elements(), 2);
-    std::vector<float> c_vec(ss.elements(), 0);
    pp["a"] = migraphx::argument(ss, a_vec.data());
    pp["b"] = migraphx::argument(ss, b_vec.data());
    std::vector<float> gpu_result;
@@ -64,7 +63,8 @@ TEST_CASE(host_same_buffer_copy)
    auto result = p.eval(pp).back();
    std::vector<float> results_vector(ss.elements(), -1);
    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
+    std::vector<float> gold_vec(ss.elements(), 0);
+    EXPECT(migraphx::verify::verify_rms_range(results_vector, gold_vec));
 }

 TEST_CASE(arguments_lifetime)

--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -133,14 +133,15 @@ bool verify_mlir(const migraphx::module& mmlir)
    auto inputs = generate_params(ref);

    auto mlir = create_program_from_mlir(mmlir);
-    return migraphx::verify_args("mlir", run_ref(ref, inputs), run_gpu(mlir, inputs));
+    return migraphx::verify_args_with_tolerance(
+        "mlir", run_gpu(mlir, inputs), migraphx::verify::expected{run_ref(ref, inputs)});
 }

 TEST_CASE(conv)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_convolution(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    return %0 : tensor<1x2x2x2xf32>
  }
@@ -163,7 +164,7 @@ TEST_CASE(conv_add_relu)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @mlir_convolution(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_convolution_add_relu(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
    %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
@@ -191,7 +192,7 @@ TEST_CASE(quant_dot_add)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<1x5x4xi8>, %arg1: tensor<1x4x3xi8>, %arg2: tensor<1x5x3xi32>) -> tensor<1x5x3xi32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_quant_dot_add(%arg0: tensor<1x5x4xi8>, %arg1: tensor<1x4x3xi8>, %arg2: tensor<1x5x3xi32>) -> tensor<1x5x3xi32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.quant_dot(%arg0, %arg1) : (tensor<1x5x4xi8>, tensor<1x4x3xi8>) -> tensor<1x5x3xi32>
    %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xi32>, tensor<1x5x3xi32>) -> tensor<1x5x3xi32>
    return %1 : tensor<1x5x3xi32>
@@ -218,7 +219,7 @@ TEST_CASE(dot_add)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_dot_add(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
    %1 = migraphx.add(%0, %arg2) : (tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
    return %1 : tensor<1x5x3xf32>
@@ -244,7 +245,7 @@ TEST_CASE(conv_int8_dequantize_quantize)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_quant_convolution_dequantizelinear_quantizelinear(%arg0: tensor<2x8x3x3xi8>, %arg1: tensor<1x8x4x4xi8>, %arg2: tensor<1x2x2x2xf32>, %arg3: tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
      %0 = migraphx.quant_convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xi8>, tensor<2x8x3x3xi8>) -> tensor<1x2x2x2xi32>
      %1 = migraphx.dequantizelinear(%0, %arg2, %arg3) : (tensor<1x2x2x2xi32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xf32>
      %2 = migraphx.quantizelinear(%1, %arg2, %arg3) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>, tensor<1x2x2x2xi32>) -> tensor<1x2x2x2xi32>
@@ -277,7 +278,7 @@ TEST_CASE(dot_convert)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>) -> tensor<1x5x3xf16> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_dot_convert(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>) -> tensor<1x5x3xf16> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
    %1 = migraphx.convert(%0) {target_type  =  1  :  i64} : (tensor<1x5x3xf32>) -> tensor<1x5x3xf16>
    return %1 : tensor<1x5x3xf16>
@@ -303,7 +304,7 @@ TEST_CASE(dot_where)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @mlir_dot(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xi8>, %arg3: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr"} {
+  func.func @mlir_dot_where(%arg0: tensor<1x5x4xf32>, %arg1: tensor<1x4x3xf32>, %arg2: tensor<1x5x3xi8>, %arg3: tensor<1x5x3xf32>) -> tensor<1x5x3xf32> attributes {arch = "", kernel = "mixr", num_cu = 0 : i64} {
    %0 = migraphx.dot(%arg0, %arg1) : (tensor<1x5x4xf32>, tensor<1x4x3xf32>) -> tensor<1x5x3xf32>
    %1 = migraphx.where(%arg2, %0, %arg3) : (tensor<1x5x3xi8>, tensor<1x5x3xf32>, tensor<1x5x3xf32>) -> tensor<1x5x3xf32>
    return %1 : tensor<1x5x3xf32>