Merge branch 'develop' into enable_navi_32_ci

0c98c38e · Ted Themistokleous · GitHub · 1612d8f3 · 64b306ab · 0c98c38e
Unverified Commit 0c98c38e authored Sep 12, 2023 by Ted Themistokleous Committed by GitHub Sep 12, 2023
20 changed files
--- a/src/pad_calc.cpp
+++ b/src/pad_calc.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -52,6 +52,11 @@ void calculate_padding(int64_t idx,
    }
 }

+/**
+ * Given the input array dimensions; kernel (wei_lens); strides; and dilations,
+ * calculate the padding value in each dimension.
+ *
+ */
 std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input_lens,
                                           const std::vector<std::size_t>& wei_lens,
                                           const std::vector<std::size_t>& strides,
@@ -60,6 +65,7 @@ std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input
 {
    std::vector<std::size_t> padding;
    assert(input_lens.size() >= 3);
+    assert(input_lens.size() == wei_lens.size());
    std::size_t num_spatial_dims = input_lens.size() - 2;
    padding.resize(2 * num_spatial_dims);
    for(std::size_t i = 0; i < num_spatial_dims; i++)
@@ -88,6 +94,11 @@ std::vector<std::size_t> calc_dyn_auto_pad(const std::vector<std::size_t>& input
    return padding;
 }

+/**
+ *   Calculate the correct output shape for a convolution with
+ *   a given input size and other parameters.
+ *
+ */
 shape compute_padded_shape(const shape& input,
                           const shape& weights,
                           const std::vector<std::size_t>& padding,
@@ -111,5 +122,33 @@ shape compute_padded_shape(const shape& input,
    return input.with_lens(output_lens);
 }

+/**
+ *   Calculate the correct output shape for a pooling with
+ *   a given input size and other parameters.  This uses
+ *   the same formula for pooling that compute_padded_shape() uses
+ *   for convolutions, but takes slightly different inputs.
+ *
+ */
+shape compute_padded_pool_shape(const shape& input,
+                                const shape& kernel,
+                                const std::vector<std::size_t>& padding,
+                                const std::vector<std::size_t>& stride,
+                                const std::vector<std::size_t>& dilation)
+{
+    const size_t num_spatial_dims = input.lens().size() - 2;
+
+    std::vector<size_t> output_lens{input.lens()[0], input.lens()[1]};
+    // calculate the output shape of the pooling: ((W - K + 2P) / S) + 1
+    for(size_t i = 0; i < num_spatial_dims; ++i)
+    {
+        auto padding_factor = padding[i] + padding[i + num_spatial_dims];
+        output_lens.push_back(std::size_t(std::max<std::ptrdiff_t>(
+            1,
+            (input.lens()[i + 2] - (1 + dilation[i] * (kernel.lens()[i] - 1)) + padding_factor) /
+                    stride[i] +
+                1)));
+    }
+    return input.with_lens(output_lens);
+}
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/shape.cpp
+++ b/src/shape.cpp
@@ -50,13 +50,14 @@ struct shape_impl
    {
        assert(t != shape::tuple_type);
    }
+
    shape_impl(shape::type_t t, std::vector<std::size_t> l)
        : m_type(t), m_lens(std::move(l)), m_standard(true)
    {
        assert(t != shape::tuple_type);
        this->calculate_strides();
-        assert(m_lens.size() == m_strides.size());
    }
+
    shape_impl(shape::type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s)
        : m_type(t), m_lens(std::move(l)), m_strides(std::move(s))
    {
@@ -151,6 +152,22 @@ struct shape_impl
            m_lens.begin(), m_lens.end(), std::size_t{1}, std::multiplies<std::size_t>());
    }

+    std::size_t get_index(size_t i) const
+    {
+        std::size_t result = 0;
+        std::size_t s      = 1;
+
+        for(auto k : migraphx::reverse(migraphx::range(m_lens.size())))
+        {
+            std::size_t stride = m_strides[k];
+            std::size_t len    = m_lens[k];
+            std::size_t idx    = (i % (s * len)) / s;
+            result += stride * idx;
+            s *= len;
+        }
+        return result;
+    }
+
    std::vector<std::size_t> min_lens() const
    {
        std::vector<std::size_t> ret(m_dyn_dims.size());
@@ -213,6 +230,7 @@ std::string shape::name(shape::type_t t)
    }
    MIGRAPHX_THROW("Invalid type");
 }
+
 std::string shape::cpp_type(shape::type_t t)
 {
    switch(t)
@@ -229,10 +247,12 @@ std::string shape::cpp_type(shape::type_t t)
 shape::shape() : impl(shape_impl::default_shape()) {}

 shape::shape(type_t t) : impl(std::make_shared<shape_impl>(t)) {}
+
 shape::shape(type_t t, std::vector<std::size_t> l)
    : impl(std::make_shared<shape_impl>(t, std::move(l)))
 {
 }
+
 shape::shape(type_t t, std::vector<std::size_t> l, std::vector<std::size_t> s)
    : impl(std::make_shared<shape_impl>(t, std::move(l), std::move(s)))
 {
@@ -358,21 +378,8 @@ std::size_t shape::index(std::size_t i) const
    assert(this->lens().size() == this->strides().size());
    if(this->standard())
        return i;
-    else
-    {
-        std::size_t s      = 1;
-        std::size_t result = 0;
-        for(std::size_t j = 0; j < this->lens().size(); j++)
-        {
-            const std::size_t k      = this->lens().size() - j - 1;
-            const std::size_t stride = this->strides()[k];
-            const std::size_t len    = this->lens()[k];
-            const std::size_t idx    = (i % (s * len)) / s;
-            result += stride * idx;
-            s *= len;
-        }
-        return result;
-    }
+
+    return impl->get_index(i);
 }

 std::vector<std::size_t> shape::multi(std::size_t idx) const

--- a/src/simplify_algebra.cpp
+++ b/src/simplify_algebra.cpp
@@ -1446,10 +1446,13 @@ struct find_split_transpose
        {
            return;
        }
+        if(std::any_of(split_outputs.begin(), split_outputs.end(), [](auto i) {
+               return i->outputs().size() != 1;
+           }))
+            return;

        std::vector<instruction_ref> vec_trans(split_outputs.size());
        std::transform(split_outputs.begin(), split_outputs.end(), vec_trans.begin(), [](auto i) {
-            assert(i->outputs().size() == 1);
            return i->outputs().front();
        });


--- a/src/simplify_reshapes.cpp
+++ b/src/simplify_reshapes.cpp
@@ -784,7 +784,7 @@ struct find_transpose_slice

 void simplify_reshapes::apply(module& m) const
 {
-    for(int i = 0; i < 4; i++)
+    for(int i = 0; i < depth; i++)
    {
        match::find_matches(m,
                            find_where_op{},

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -123,6 +123,7 @@ add_library(migraphx_gpu
    lrn.cpp
    mlir.cpp
    multinomial.cpp
+    no_device.cpp
    nonzero.cpp
    pack_args.cpp
    pack_int8_args.cpp

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -119,6 +119,33 @@ struct mlir_op
 MIGRAPHX_REGISTER_OP(mlir_op);

 namespace {
+std::tuple<instruction_ref, std::vector<instruction_ref>>
+fuse_input_ops_and_gemm_based_op(module_ref mm, instruction_ref gemm_based_op)
+{
+    std::vector<instruction_ref> top_inputs;
+    std::vector<instruction_ref> imm_inputs;
+    size_t input_cnt = 0;
+    for(instruction_ref input : gemm_based_op->inputs())
+    {
+        std::vector<operation> op_stream;
+        while(contains({"slice", "transpose", "contiguous", "reshape"}, input->name()))
+        {
+            op_stream.push_back(input->get_operator());
+            input = input->inputs().at(0);
+        }
+        top_inputs.push_back(input);
+        instruction_ref prev_input =
+            mm->add_parameter("y" + std::to_string(input_cnt++), input->get_shape());
+        for(const auto& op : reverse(op_stream))
+        {
+            prev_input = mm->add_instruction(op, {prev_input});
+        }
+        imm_inputs.push_back(prev_input);
+    }
+    instruction_ref new_gemm_based_op =
+        mm->add_instruction(gemm_based_op->get_operator(), imm_inputs);
+    return {new_gemm_based_op, top_inputs};
+}

 MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
 {
@@ -134,7 +161,7 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
    return true;
 }

-struct find_mlir_op
+struct find_mlir_fused_ops
 {
    auto matcher() const
    {
@@ -163,34 +190,6 @@ struct find_mlir_op
        return ins_map;
    }

-    std::tuple<instruction_ref, std::vector<instruction_ref>>
-    fuse_input_ops_and_gemm_based_op(module_ref mm, instruction_ref gemm_based_op) const
-    {
-        std::vector<instruction_ref> top_inputs;
-        std::vector<instruction_ref> imm_inputs;
-        size_t input_cnt = 0;
-        for(instruction_ref input : gemm_based_op->inputs())
-        {
-            std::vector<operation> op_stream;
-            while(contains({"slice", "transpose", "contiguous", "reshape"}, input->name()))
-            {
-                op_stream.push_back(input->get_operator());
-                input = input->inputs().at(0);
-            }
-            top_inputs.push_back(input);
-            instruction_ref prev_input =
-                mm->add_parameter("y" + std::to_string(input_cnt++), input->get_shape());
-            for(const auto& op : reverse(op_stream))
-            {
-                prev_input = mm->add_instruction(op, {prev_input});
-            }
-            imm_inputs.push_back(prev_input);
-        }
-        instruction_ref new_gemm_based_op =
-            mm->add_instruction(gemm_based_op->get_operator(), imm_inputs);
-        return {new_gemm_based_op, top_inputs};
-    }
-
    // Whitelist supported fusion options, including imposing type constraints
    // for cases where MLIR only supports an operation (usually a pointwise function)
    // on particular types.
@@ -301,14 +300,95 @@ struct find_mlir_op
    }
 };

+struct find_mlir_standalone_convolution_op
+{
+    auto matcher() const { return match::name("convolution"); }
+
+    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
+    {
+        auto conv_based_op = r.result;
+        // enable only for fp32/fp16/i8 types
+        if(std::any_of(conv_based_op->inputs().begin(), conv_based_op->inputs().end(), [&](auto i) {
+               return not contains(
+                   {shape::type_t::float_type, shape::type_t::half_type, shape::type_t::int8_type},
+                   i->get_shape().type());
+           }))
+            return;
+
+        static size_t counter = 0;
+        module_ref mm         = mpm.create_module("mlir_" + std::to_string(counter++));
+        mm->set_bypass();
+        auto [anchor_op, top_inputs] = fuse_input_ops_and_gemm_based_op(mm, conv_based_op);
+        mm->add_return({anchor_op});
+        mpm.get_module().replace_instruction(
+            conv_based_op, mlir_op{conv_based_op->get_operator()}, top_inputs, {mm});
+    }
+};
+
+/**
+ * @brief Declares a new MIGraphX environment variable which forces to generate
+ * only specific MLIR operations.
+ *
+ * The variable, if defined, forces MIGraphX to use only specific operations
+ * with MLIR regardless of the underlying GPU architecture. The variable accepts
+ * a list of operations separated by comma. The variable recognizes the following
+ * operations: "fused", "convolution". If the variable is not defined MIGraphX
+ * will decide by itself which operations to delegate to MLIR. The variable is
+ * intended to be primarily used by rocMLIR developers.
+ */
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_USE_SPECIFIC_OPS);
+bool is_self_decide() { return string_value_of(MIGRAPHX_MLIR_USE_SPECIFIC_OPS{}, "").empty(); }
+
+bool is_requested(std::string_view option)
+{
+    assert(not is_self_decide());
+    auto string_value  = string_value_of(MIGRAPHX_MLIR_USE_SPECIFIC_OPS{}, "");
+    const auto options = split_string(string_value, ',');
+    return contains(options, option);
+}
+
+bool is_fusion_enabled()
+{
+    if(is_self_decide())
+    {
+        return true;
+    }
+    return is_requested("fused");
+}
+
+bool is_standalone_convs_enabled(context* ctx)
+{
+    if(is_self_decide())
+    {
+        if(ctx == nullptr)
+        {
+            return false;
+        }
+        else
+        {
+            const auto& device = ctx->get_current_device();
+            const std::string navi_family{"gfx110"};
+            return starts_with(device.get_gfx_name(), navi_family);
+        }
+    }
+    return is_requested("convolution");
+}
 } // namespace

-#endif
+#endif // MIGRAPHX_MLIR

 void fuse_mlir::apply(module_pass_manager& mpm) const
 {
 #ifdef MIGRAPHX_MLIR
-    match::find_matches(mpm, find_mlir_op{});
+    if(is_fusion_enabled())
+    {
+        match::find_matches(mpm, find_mlir_fused_ops{});
+    }
+
+    if(is_standalone_convs_enabled(this->ctx))
+    {
+        match::find_matches(mpm, find_mlir_standalone_convolution_op{});
+    }
 #else
    (void)mpm;
 #endif

--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -55,7 +55,7 @@ bool is_device_ptr(const void* ptr)
    auto status = hipPointerGetAttributes(&attr, ptr);
    if(status != hipSuccess)
        return false;
-    return attr.memoryType == hipMemoryTypeDevice;
+    return attr.type == hipMemoryTypeDevice;
 }

 std::size_t get_available_gpu_memory()

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -84,8 +84,10 @@ struct miopen_convolution
    {
        check_shapes{inputs, op}.has(4);
        std::vector<shape> conv_inputs(inputs.begin(), inputs.begin() + 2);
-        check_shapes{conv_inputs, *this}.max_ndims(5).packed_layouts(
-            {{0, 1, 2}, {0, 1, 2, 3}, {0, 2, 3, 1}, {0, 1, 2, 3, 4}});
+        check_shapes{conv_inputs, *this}
+            .max_ndims(5)
+            .packed_layouts({{0, 1, 2}, {0, 1, 2, 3}, {0, 2, 3, 1}, {0, 1, 2, 3, 4}})
+            .same_layout();
        return migraphx::compute_shape<Op>(op, conv_inputs);
    }


--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -22,6 +22,7 @@
 * THE SOFTWARE.
 */
 #include "migraphx/make_op.hpp"
+#include <migraphx/stringutils.hpp>
 #include <migraphx/gpu/mlir.hpp>

 #ifdef MIGRAPHX_MLIR
@@ -69,6 +70,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_MLIR);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_DB);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_MLIR_TUNING_CFG);

@@ -93,6 +95,8 @@ struct mlir_handle
        friend bool operator==(ptr x, ptr y) { return x.get_value() == y.get_value(); }

        friend bool operator!=(ptr x, ptr y) { return not(x == y); }
+
+        explicit operator bool() const noexcept { return obj != ptr(); }
        T obj{};
    };

@@ -645,8 +649,8 @@ struct mlir_program
    void set_gpu_properties(const context& migraphx_ctx)
    {
        const auto& device = migraphx_ctx.get_current_device();
-        target_arch  = device.get_device_name();
-        num_cu       = device.get_cu_count();
+        target_arch        = device.get_device_name();
+        num_cu             = device.get_cu_count();
    }

    std::pair<std::size_t, std::size_t> get_launch_params() const
@@ -682,8 +686,10 @@ struct mlir_program
    {
        tuning_config tc;
        run_high_level_pipeline();
-        mlir_tuning_space params{
-            mlirRockTuningSpaceCreate(mmodule.get(), RocmlirTuningParamSetKindFull)};
+        auto tuning_mode = RocmlirTuningParamSetKindFull;
+        if(enabled(MIGRAPHX_MLIR_TUNE_EXHAUSTIVE{}))
+            tuning_mode = RocmlirTuningParamSetKindExhaustive;
+        mlir_tuning_space params{mlirRockTuningSpaceCreate(mmodule.get(), tuning_mode)};
        for(auto i : range(mlirRockTuningGetNumParams(params.get())))
        {
            mlir_tuning_param param{mlirRockTuningParamCreate()};
@@ -717,7 +723,8 @@ struct mlir_program
        if(not tuning_cfg_path.empty())
        {
            std::vector<std::string> tokens = split_string(prob_config, '\t');
-            std::string prob                = tokens[1];
+            std::string prob                = tokens[2];
+
            if(starts_with(prob, "conv"))
            {
                tuning_cfg_path += ".conv";
@@ -727,6 +734,8 @@ struct mlir_program
                tuning_cfg_path += ".gemm";
            }
            std::ofstream tuning_cfg(tuning_cfg_path, std::ios::app);
+            prob =
+                trim(prob, [](unsigned char c) { return (c == '\0') or (std::isspace(c) != 0); });
            tuning_cfg << prob << std::endl;
        }
    }
@@ -867,15 +876,22 @@ code_object_op compile_mlir(const context& migraphx_ctx,
    adjust_param_shapes(m, to_shapes(inputs));
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});

+    static std::mutex mutex;
    if(trace)
+    {
+        const std::lock_guard<std::mutex> lock(mutex);
        std::cout << m << std::endl;
+    }

    mlir_program mp;
    mp.set_gpu_properties(migraphx_ctx);
    mp.parse(m);
    auto mod_op = mlirModuleGetOperation(mp.mmodule.get());
    if(trace)
+    {
+        const std::lock_guard<std::mutex> lock(mutex);
        std::cout << mlir_print(&mlirOperationPrint, mod_op) << std::endl;
+    }
    auto co            = mp.compile(solution);
    co.expected_inputs = to_shapes(inputs);
    co.output          = m.get_output_shapes().front();

--- a/src/targets/gpu/no_device.cpp
+++ b/src/targets/gpu/no_device.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifdef __HIP_DEVICE_COMPILE__
+#error \
+    "Device compilation not allowed for migraphx_gpu. Do not link with hip::device. Device code should go into migraphx_device or migraphx_kernels"
+#endif
--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -177,6 +177,7 @@ add_dependencies(check test_tf)

 add_subdirectory(api)
 add_subdirectory(verify)
+add_subdirectory(ref)

 if(MIGRAPHX_ENABLE_PYTHON)
    add_subdirectory(py)

--- a/test/check_shapes_test.cpp
+++ b/test/check_shapes_test.cpp
@@ -31,24 +31,39 @@

 using migraphx::shape;

-bool create_shapes(bool dynamic_allowed)
+void create_shapes(bool dynamic_allowed)
 {
-    try
-    {
-        shape a{shape::int64_type, {3}};
-        shape b{shape::float_type, {{3, 6}, {4, 4}}};
-        auto op = migraphx::make_op("add");
-        migraphx::check_shapes{{a, b}, op, dynamic_allowed}.has(2);
-        return true;
-    }
-    catch(...)
-    {
-        return false;
-    }
+    shape a{shape::int64_type, {3}};
+    shape b{shape::float_type, {{3, 6}, {4, 4}}};
+    migraphx::check_shapes{{a, b}, "", dynamic_allowed}.has(2);
 }

-TEST_CASE(allow_dynamic_shape) { EXPECT(create_shapes(true)); }
+TEST_CASE(allow_dynamic_shape)
+{
+    EXPECT(not test::throws([] { create_shapes(true); }));
+}
+
+TEST_CASE(fail_dynamic_shape)
+{
+    EXPECT(test::throws([] { create_shapes(false); }));
+}

-TEST_CASE(fail_dynamic_shape) { EXPECT(not create_shapes(false)); }
+TEST_CASE(same_layout_fail)
+{
+    EXPECT(test::throws([] {
+        shape a{shape::float_type, {2, 3}};
+        shape b{shape::float_type, {2, 3}, {1, 2}};
+        migraphx::check_shapes{{a, b}, ""}.same_layout();
+    }));
+}
+
+TEST_CASE(same_layout_pass)
+{
+    EXPECT(not test::throws([] {
+        shape a{shape::float_type, {2, 3}, {1, 2}};
+        shape b{shape::float_type, {2, 3}, {1, 2}};
+        migraphx::check_shapes{{a, b}, ""}.same_layout();
+    }));
+}

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/common_dims.cpp
+++ b/test/common_dims.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/common_dims.hpp>
+#include <test.hpp>
+
+using axes_map = std::vector<std::vector<std::size_t>>;
+
+TEST_CASE(common_d1_less)
+{
+    auto cd = migraphx::common_dims::compute({2, 32, 40, 8}, {2, 1280, 8});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1}, {2}, {3}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1, 2}, {3}});
+}
+
+TEST_CASE(common1)
+{
+    auto cd = migraphx::common_dims::compute({2, 32, 2560}, {2, 1280, 8, 8});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1}, {2, 3, 4}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1, 2}, {3}, {4}});
+}
+
+TEST_CASE(common2)
+{
+    auto cd = migraphx::common_dims::compute({2, 1280, 8, 8}, {2, 32, 2560});
+    EXPECT(cd.dims == std::vector<std::size_t>{2, 32, 40, 8, 8});
+    EXPECT(cd.axes_map1 == axes_map{{0}, {1, 2}, {3}, {4}});
+    EXPECT(cd.axes_map2 == axes_map{{0}, {1}, {2, 3, 4}});
+}
+
+TEST_CASE(common_error1)
+{
+    auto cd = migraphx::common_dims::compute({6, 35}, {3, 7, 2, 5});
+    EXPECT(cd.dims.empty());
+}
+
+TEST_CASE(common_error2)
+{
+    auto cd = migraphx::common_dims::compute({3, 7, 2, 5}, {6, 35});
+    EXPECT(cd.dims.empty());
+}
+
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/eliminate_contiguous_test.cpp
+++ b/test/eliminate_contiguous_test.cpp
@@ -196,15 +196,47 @@ TEST_CASE(contiguous_pointwise)
            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
        auto yc  = mm->add_instruction(migraphx::make_op("contiguous"), yb);
        auto add = add_pointwise(p, "main:pointwise0", {x, yc}, single_pointwise("add"));
-        mm->add_instruction(pass_op{}, add);
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
    }
    auto count = std::distance(mm->begin(), mm->end());
    run_pass(*mm);
-    EXPECT(std::distance(mm->begin(), mm->end()) == (count - 1));
+    EXPECT(std::distance(mm->begin(), mm->end()) == (count - 2));
    EXPECT(std::none_of(
        mm->begin(), mm->end(), [](auto&& ins) { return ins.name() == "contiguous"; }));
 }

+TEST_CASE(contiguous_nhwc_pointwise)
+{
+    auto s =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {2, 3, 8, 8}, {0, 2, 3, 1});
+    migraphx::program p1;
+    {
+        auto* mm = p1.get_main_module();
+        auto x   = mm->add_parameter("x", s);
+        auto y   = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
+        auto yb  = mm->add_instruction(
+            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
+        auto yc   = mm->add_instruction(migraphx::make_op("contiguous"), yb);
+        auto add  = add_pointwise(p1, "main:pointwise0", {x, yc}, single_pointwise("add"));
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
+    }
+    run_pass(*p1.get_main_module());
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s);
+        auto y   = mm->add_parameter("y", migraphx::shape{migraphx::shape::float_type, {3}});
+        auto yb  = mm->add_instruction(
+            migraphx::make_op("broadcast", {{"axis", 1}, {"out_lens", {2, 3, 8, 8}}}), y);
+        auto add  = add_pointwise(p2, "main:pointwise0", {x, yb}, single_pointwise("add"));
+        auto cadd = mm->add_instruction(migraphx::make_op("contiguous"), add);
+        mm->add_instruction(pass_op{}, cadd);
+    }
+    EXPECT(p1 == p2);
+}
+
 TEST_CASE(slice_contiguous)
 {
    migraphx::module m;

--- a/test/eliminate_pad_test.cpp
+++ b/test/eliminate_pad_test.cpp
@@ -27,7 +27,7 @@
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/instruction.hpp>
 #include <basic_ops.hpp>
-#include <migraphx/operators.hpp>
+#include <migraphx/op/common.hpp>
 #include <migraphx/make_op.hpp>

 #include <test.hpp>
@@ -58,9 +58,8 @@ create_conv(migraphx::instruction_ref& l_img,
    migraphx::shape s_weights{migraphx::shape::int32_type, {4, channels, 3, 3}};
    std::vector<int32_t> weights(4 * channels * 3 * 3);
    auto l_weights = m.add_literal(migraphx::literal{s_weights, weights});
-    migraphx::op::convolution op;
-    op.padding_mode = padding_mode;
-    return m.add_instruction(op, l_img, l_weights);
+    return m.add_instruction(
+        migraphx::make_op("convolution", {{"padding_mode", padding_mode}}), l_img, l_weights);
 }

 TEST_CASE(rewrite_pad)

--- a/test/float_equal.cpp
+++ b/test/float_equal.cpp
@@ -112,7 +112,10 @@ TEST_CASE_REGISTER(test_limits<double, int>);
 TEST_CASE_REGISTER(test_limits<double, migraphx::half>);
 TEST_CASE_REGISTER(test_limits<float, int>);
 TEST_CASE_REGISTER(test_limits<int, migraphx::half>);
+#ifndef _WIN32
+// On Windows, types int and long have the same min and max values.
 TEST_CASE_REGISTER(test_limits<long, int>);
+#endif
 TEST_CASE_REGISTER(test_limits<long, char>);

 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/fuse_pointwise.cpp
+++ b/test/fuse_pointwise.cpp
@@ -21,8 +21,9 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#include <migraphx/dead_code_elimination.hpp>
 #include <migraphx/fuse_pointwise.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/eliminate_contiguous.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/program.hpp>
@@ -361,4 +362,154 @@ TEST_CASE(no_input)
    EXPECT(p == p2);
 }

+TEST_CASE(add_reshape_add)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {3, 10, 16}};
+    migraphx::shape s2{migraphx::shape::float_type, {3, 40, 2, 2}};
+    migraphx::shape s3{migraphx::shape::float_type, {3, 10, 4, 2, 2}};
+    migraphx::program p1;
+    {
+        auto* mm  = p1.get_main_module();
+        auto x    = mm->add_parameter("x", s1);
+        auto y    = mm->add_parameter("y", s1);
+        auto z    = mm->add_parameter("z", s2);
+        auto add1 = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), add1);
+        auto add2 = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), x);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), y);
+        auto z2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), z);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z2}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd);
+        mm->add_return({reshape});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_reshape_add_nonstandard)
+{
+    migraphx::shape s1 =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {3, 10, 16}, {2, 0, 1});
+    migraphx::shape s2{migraphx::shape::float_type, {3, 40, 2, 2}};
+    migraphx::shape s3{migraphx::shape::float_type, {3, 10, 4, 2, 2}};
+    migraphx::program p1;
+    {
+        auto* mm     = p1.get_main_module();
+        auto x       = mm->add_parameter("x", s1);
+        auto y       = mm->add_parameter("y", s1);
+        auto z       = mm->add_parameter("z", s2);
+        auto add1    = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto c       = mm->add_instruction(migraphx::make_op("contiguous"), add1);
+        auto reshape = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), c);
+        auto add2    = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cx);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), cy);
+        auto z2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s3.lens()}}), z);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z2}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd);
+        mm->add_return({reshape});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_unsqueeze_add_nonstandard)
+{
+    migraphx::shape s1 =
+        migraphx::shape::from_permutation(migraphx::shape::float_type, {3, 10, 16}, {2, 0, 1});
+    migraphx::shape s2{migraphx::shape::float_type, {3, 10, 1, 16}};
+    migraphx::program p1;
+    {
+        auto* mm       = p1.get_main_module();
+        auto x         = mm->add_parameter("x", s1);
+        auto y         = mm->add_parameter("y", s1);
+        auto z         = mm->add_parameter("z", s2);
+        auto add1      = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto unsqueeze = mm->add_instruction(migraphx::make_op("unsqueeze", {{"axes", {2}}}), add1);
+        auto add2      = mm->add_instruction(migraphx::make_op("add"), unsqueeze, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm = p2.get_main_module();
+        auto x   = mm->add_parameter("x", s1);
+        auto y   = mm->add_parameter("y", s1);
+        auto z   = mm->add_parameter("z", s2);
+        auto cx  = mm->add_instruction(migraphx::make_op("contiguous"), x);
+        auto cy  = mm->add_instruction(migraphx::make_op("contiguous"), y);
+        auto x2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cx);
+        auto y2  = mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), cy);
+        auto fadd =
+            add_pointwise(p2, "main:pointwise0", {x2, y2, z}, [=](auto* pm, const auto& inputs) {
+                auto add1 = pm->add_instruction(migraphx::make_op("add"), inputs[0], inputs[1]);
+                return pm->add_instruction(migraphx::make_op("add"), add1, inputs[2]);
+            });
+        mm->add_return({fadd});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
+TEST_CASE(add_reshape_add_error)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {6, 35}};
+    migraphx::shape s2{migraphx::shape::float_type, {3, 7, 2, 5}};
+    migraphx::program p1;
+    {
+        auto* mm  = p1.get_main_module();
+        auto x    = mm->add_parameter("x", s1);
+        auto y    = mm->add_parameter("y", s1);
+        auto z    = mm->add_parameter("z", s2);
+        auto add1 = mm->add_instruction(migraphx::make_op("add"), x, y);
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), add1);
+        auto add2 = mm->add_instruction(migraphx::make_op("add"), reshape, z);
+        mm->add_return({add2});
+    }
+    run_pass(p1);
+    migraphx::program p2;
+    {
+        auto* mm   = p2.get_main_module();
+        auto x     = mm->add_parameter("x", s1);
+        auto y     = mm->add_parameter("y", s1);
+        auto z     = mm->add_parameter("z", s2);
+        auto fadd1 = add_pointwise(p2, "main:pointwise0", {x, y}, single_pointwise("add"));
+        auto reshape =
+            mm->add_instruction(migraphx::make_op("reshape", {{"dims", s2.lens()}}), fadd1);
+        auto fadd2 = add_pointwise(p2, "main:pointwise1", {reshape, z}, single_pointwise("add"));
+        mm->add_return({fadd2});
+    }
+    EXPECT(p1.sort() == p2.sort());
+}
+
 int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/quantization.cpp
+++ b/test/gpu/quantization.cpp
@@ -24,7 +24,7 @@
 #include <iostream>
 #include <vector>
 #include <migraphx/gpu/fuse_mlir.hpp>
-#include <migraphx/operators.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/quantization.hpp>
 #include <migraphx/generate.hpp>
@@ -90,7 +90,7 @@ TEST_CASE(int8_quantization)
        migraphx::shape sc{migraphx::shape::float_type, {5, 8}};
        auto pa = mm->add_parameter("a", sa);
        auto pb = mm->add_parameter("b", sb);
-        mm->add_instruction(migraphx::op::dot{}, pa, pb);
+        mm->add_instruction(migraphx::make_op("dot"), pa, pb);

        return p;
    };