Merge branch 'simplify_1_mul_div_ops' into divide_by_zero_check

2ba401f0 · Ted Themistokleous · GitHub · a330d428 · 8398fb19 · 2ba401f0
Unverified Commit 2ba401f0 authored Jul 28, 2022 by Ted Themistokleous Committed by GitHub Jul 28, 2022
20 changed files
--- a/src/targets/fpga/include/migraphx/fpga/subgraph.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/subgraph.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
+#define MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+struct subgraph
+{
+    std::string name() const { return "fpga::subgraph"; }
+    void apply(module_pass_manager& mpm) const;
+};
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
--- a/src/targets/fpga/include/migraphx/fpga/target.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/target.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_FPGA_TARGET_HPP
+#define MIGRAPHX_GUARD_FPGA_TARGET_HPP
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/compile_options.hpp>
+#include <migraphx/fpga/context.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct pass;
+namespace fpga {
+struct target
+{
+    std::string name() const;
+    std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
+    migraphx::context get_context() const { return context{}; }
+    float is_supported(instruction_ref ins, support_metric m);
+    argument copy_to(const argument& arg) const { return arg; }
+    argument copy_from(const argument& arg) const { return arg; }
+    argument allocate(const shape& s) const;
+};
+MIGRAPHX_REGISTER_TARGET(target);
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_FPGA_TARGET_HPP
--- a/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
+#define MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
+#include <string>
+#include <migraphx/instruction.hpp>
+#include <migraphx/pass_manager.hpp>
+namespace vitis_ai {
+class x_model
+{
+    migraphx::shape shape;
+    public:
+    migraphx::shape get_shape() const;
+    void set_shape(migraphx::shape);
+};
+x_model create_xmodel(migraphx::module_ref mod);
+migraphx::argument execute(const x_model& xmodel,
+                           const migraphx::shape& output_shape,
+                           std::vector<migraphx::argument>& args);
+} // namespace vitis_ai
+#endif // MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
--- a/src/targets/fpga/lowering.cpp
+++ b/src/targets/fpga/lowering.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/fpga/lowering.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/register_op.hpp>
+#include <migraphx/stringutils.hpp>
+#include <iostream>
+#include "migraphx/fpga/vitis_ai_adapter.hpp"
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+struct fpga_vitis_op
+{
+    fpga_vitis_op() = default;
+    explicit fpga_vitis_op(vitis_ai::x_model model) : xmodel(std::move(model)){};
+    vitis_ai::x_model xmodel;
+    int dummy = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        // return pack(f(self.xmodel, "xmodel"));
+        return pack(f(self.dummy, "dummy"));
+    }
+    std::string name() const { return "fpga::vitis_ai"; }
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        (void)inputs;
+        return xmodel.get_shape();
+    }
+    argument
+    compute(const context& ctx, const shape& output_shape, std::vector<argument> args) const
+    {
+        std::cout << "The context is " << ctx.id << std::endl;
+        return ::vitis_ai::execute(xmodel, output_shape, args);
+    }
+};
+MIGRAPHX_REGISTER_OP(fpga_vitis_op)
+void lowering::apply(module& m) const
+{
+    auto* mod = &m;
+    // test modifying the context from a pass
+    ctx->id = 2;
+    for(auto it : iterator_for(*mod))
+    {
+        if(it->name() == "fpga::vitis_placeholder")
+        {
+            assert(it->module_inputs().size() == 1);
+            auto xmodel = ::vitis_ai::create_xmodel(it->module_inputs()[0]);
+            mod->replace_instruction(it, fpga_vitis_op{xmodel}, it->inputs());
+        }
+    }
+}
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/subgraph.cpp
+++ b/src/targets/fpga/subgraph.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/fpga/subgraph.hpp>
+#include <migraphx/instruction.hpp>
+#include "migraphx/iterator.hpp"
+#include <migraphx/iterator_for.hpp>
+#include "migraphx/make_op.hpp"
+#include "migraphx/module.hpp"
+#include "migraphx/ranges.hpp"
+#include <migraphx/register_op.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/pass_manager.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+struct fpga_placeholder_op
+{
+    fpga_placeholder_op() = default;
+    int dummy = 0;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dummy, "dummy"));
+    }
+    std::string name() const { return "fpga::vitis_placeholder"; }
+    shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
+    {
+        (void)inputs;
+        if(mods.size() != 1)
+        {
+            MIGRAPHX_THROW("should have one submodule.");
+        }
+        module_ref sm = mods.front();
+        if(sm->get_output_shapes().size() != 1)
+            MIGRAPHX_THROW("Only one return");
+        return sm->get_output_shapes().front();
+    }
+};
+MIGRAPHX_REGISTER_OP(fpga_placeholder_op)
+bool is_fpga_instr(migraphx::instruction_ref it)
+{
+    // assuming all instructions that aren't @param, @literal, or input data are fpga instrs
+    if(migraphx::starts_with(it->name(), "@"))
+    {
+        return false;
+    }
+    // no inputs to the instr means it's input data
+    if(it->inputs().empty())
+    {
+        return false;
+    }
+    return true;
+}
+void subgraph::apply(module_pass_manager& mpm) const
+{
+    auto& mod = mpm.get_module();
+    auto* pm  = mpm.create_module(mod.name() + ":fpga");
+    pm->set_bypass();
+    migraphx::instruction_ref first = mod.end();
+    migraphx::instruction_ref last;
+    std::vector<migraphx::instruction_ref> literal_inputs;
+    for(auto it : iterator_for(mod))
+    {
+        // assuming we want all the params/literals as inputs to the FPGA submodule
+        if(migraphx::starts_with(it->name(), "@param") ||
+           migraphx::starts_with(it->name(), "@literal"))
+        {
+            literal_inputs.push_back(it);
+        }
+        if(is_fpga_instr(it))
+        {
+            if(first == mod.end())
+            {
+                first = it;
+            }
+            last = it;
+        }
+    }
+    // TODO(varunsh): this code may be replaceable by code in the fuse_pointwise pass
+    // assuming all FPGA instructions are in one contiguous range
+    pm->insert_instructions(pm->end(), first, last, {});
+    migraphx::instruction_ref placeholder_ins;
+    for(auto it : iterator_for(mod))
+    {
+        if(migraphx::starts_with(it->name(), "@return"))
+        {
+            placeholder_ins = mod.insert_instruction(
+                it, migraphx::make_op("fpga::vitis_placeholder"), literal_inputs, {pm});
+            break;
+        }
+    }
+    mod.replace_return({placeholder_ins});
+}
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/target.cpp
+++ b/src/targets/fpga/target.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/fpga/target.hpp>
+#include <migraphx/fpga/lowering.hpp>
+#include <migraphx/fpga/subgraph.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/pass.hpp>
+#include <migraphx/auto_contiguous.hpp>
+#include <migraphx/rewrite_rnn.hpp>
+#include <migraphx/eliminate_pad.hpp>
+#include <migraphx/insert_pad.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/normalize_ops.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+std::string target::name() const { return "fpga"; }
+std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const
+{
+    // not sure if all these passes are needed but they were copied from ref/
+    auto& ctx = any_cast<context>(gctx);
+    return {normalize_ops{},
+            eliminate_pad{},
+            dead_code_elimination{},
+            insert_pad{},
+            dead_code_elimination{},
+            rewrite_rnn{},
+            dead_code_elimination{},
+            auto_contiguous{},
+            dead_code_elimination{},
+            subgraph{},
+            dead_code_elimination{},
+            lowering{&ctx},
+            dead_code_elimination{}};
+}
+argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
+float is_supported(instruction_ref ins, support_metric m)
+{
+    // for now, not using the ins and metric to return a value
+    (void)ins;
+    (void)m;
+    return 1.0;
+}
+MIGRAPHX_REGISTER_TARGET(target);
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/vitis_ai_adapter.cpp
+++ b/src/targets/fpga/vitis_ai_adapter.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "migraphx/fpga/vitis_ai_adapter.hpp"
+#include "migraphx/module.hpp"
+#include "migraphx/stringutils.hpp"
+namespace vitis_ai {
+migraphx::shape x_model::get_shape() const { return shape; };
+void x_model::set_shape(migraphx::shape s) { shape = s; }
+x_model create_xmodel(const migraphx::module_ref mod)
+{
+    std::cout << "Calling an external function: create_xmodel!\n";
+    x_model xmodel;
+    xmodel.set_shape(mod->get_output_shapes());
+    return xmodel;
+}
+migraphx::argument execute(const x_model& xmodel,
+                           const migraphx::shape& output_shape,
+                           std::vector<migraphx::argument>& args)
+{
+    (void)xmodel;
+    std::cout << "Calling an external function: execute!\n";
+    std::cout << "Output Shape: " << output_shape << std::endl;
+    std::cout << "Args: " << args.size() << std::endl;
+    for(const auto& arg : args)
+    {
+        std::cout << "  " << arg.get_shape() << std::endl;
+    }
+    std::cout << std::endl;
+    migraphx::argument result{output_shape};
+    return result;
+}
+} // namespace vitis_ai
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -164,6 +164,7 @@ add_library(migraphx_gpu
    deconvolution.cpp
    device_name.cpp
    elu.cpp
+    fuse_mlir.cpp
    fuse_ops.cpp
    gather.cpp
    gemm_impl.cpp
@@ -176,7 +177,7 @@ add_library(migraphx_gpu
    loop.cpp
    lrn.cpp
    leaky_relu.cpp
-    mlir_conv.cpp
+    mlir.cpp
    multinomial.cpp
    nonzero.cpp
    pack_args.cpp
@@ -320,16 +321,26 @@ message(STATUS "extractkernel: ${MIGRAPHX_EXTRACT_KERNEL}")
 set(MIGRAPHX_ENABLE_MLIR OFF CACHE BOOL "")
 if(MIGRAPHX_ENABLE_MLIR)
-    find_library(LIBMLIRMIOPEN MLIRMIOpenThin REQUIRED)
+    find_library(MLIRAPI_LIBRARY MLIRMIOpen 
+        PATH_SUFFIXES
+        # Workaournd broken mlir install
+        lib/ lib/lib)
    # REQUIRED is not supported before cmake 3.18
-    if(NOT LIBMLIRMIOPEN)
+    if(NOT MLIRAPI_LIBRARY)
-        message(FATAL_ERROR "libMLIRMIOpenThin not found")
+        message(FATAL_ERROR "libMLIRMIOpen not found")
    else()
-        message(STATUS "Build with libMLIRMIOpenThin: " ${LIBMLIRMIOPEN})
+        message(STATUS "Build with libMLIRMIOpen: " ${MLIRAPI_LIBRARY})
    endif()
-    target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR_MIOPEN_SUPPORT")
+    find_path(MLIRAPI_HEADERS NAMES mlir-c/Dialect/MIGraphX.h)
-    target_link_libraries(migraphx_gpu PUBLIC ${LIBMLIRMIOPEN})
+    # Workaround MLIR broken installation
+    find_path(MLIRAPI_HEADERS2 NAMES mlir-c/Registration.h
+        PATH_SUFFIXES 
+        include/external/include external/include)
+    target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
+    target_include_directories(migraphx_gpu SYSTEM PRIVATE ${MLIRAPI_HEADERS} ${MLIRAPI_HEADERS2})
+    target_link_libraries(migraphx_gpu PUBLIC ${MLIRAPI_LIBRARY})
 endif()
 set(MIGRAPHX_USE_HIPRTC OFF CACHE BOOL "")

--- a/src/targets/gpu/code_object_op.cpp
+++ b/src/targets/gpu/code_object_op.cpp
@@ -52,7 +52,7 @@ code_object_op::compute(context& ctx, const shape&, const std::vector<argument>&
    std::transform(
        args.begin(), args.end(), kargs.begin(), [](const argument& a) { return a.data(); });
    k.launch(ctx.get_stream().get(), global, local, std::move(kargs));
-    return args.back();
+    return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
 {

--- a/src/targets/gpu/compile_gen.cpp
+++ b/src/targets/gpu/compile_gen.cpp
@@ -43,6 +43,9 @@ static std::vector<std::size_t> vector_sizes(const std::vector<shape>& inputs)
 vectorize vectorize::elements(std::size_t axis, const std::vector<shape>& inputs)
 {
+    if(std::all_of(
+           inputs.begin(), inputs.end(), [&](const auto& s) { return s.lens()[axis] == 1; }))
+        return {1, axis};
    auto sizes = vector_sizes(inputs);
    std::vector<std::size_t> max_vec_size;
    std::transform(inputs.begin(),

--- a/src/targets/gpu/compile_hip.cpp
+++ b/src/targets/gpu/compile_hip.cpp
@@ -43,6 +43,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG_SYM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
@@ -227,6 +228,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
    if(params.find("-std=") == std::string::npos)
        params += " --std=c++17";
    params += " -fno-gpu-rdc";
+    if(enabled(MIGRAPHX_GPU_DEBUG_SYM{}))
+        params += " -g";
    params += " -c";
    if(is_hcc_compiler())
    {

--- a/src/targets/gpu/compile_hip_code_object.cpp
+++ b/src/targets/gpu/compile_hip_code_object.cpp
@@ -51,9 +51,9 @@ static const char* const make_tensor_template = R"__migraphx__(
 template<>
 struct make_tensor<${n}>
 {
-    static __device__ auto apply(void* p)
+    static __device__ auto apply(void* __restrict__ p)
    {
-        return make_tensor_view(reinterpret_cast<${type}*>(p), make_shape(${lens}, ${strides}));
+        return make_tensor_view(reinterpret_cast<${type}* __restrict__>(p), make_shape(${lens}, ${strides}));
    }
 };
 )__migraphx__";

--- a/src/targets/gpu/deconvolution.cpp
+++ b/src/targets/gpu/deconvolution.cpp
@@ -59,31 +59,30 @@ argument miopen_deconvolution::compute(context& ctx,
    auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()));
    auto y_desc = make_tensor(reshape_if_1d(output_shape));
-    float alpha = 1;
+    if(solution_id == 0)
-    float beta  = 0;
+        MIGRAPHX_THROW("MIOpen Deconvolution: invalid solution ID");
-    auto status = miopenConvolutionForward(ctx.get_stream().get_miopen(),
-                                           &alpha,
+    auto status = miopenConvolutionForwardImmediate(ctx.get_stream().get_miopen(),
-                                           x_desc.get(),
+                                                    w_desc.get(),
-                                           args[0].implicit(),
+                                                    args[1].implicit(),
-                                           w_desc.get(),
+                                                    x_desc.get(),
-                                           args[1].implicit(),
+                                                    args[0].implicit(),
-                                           cd.get(),
+                                                    cd.get(),
-                                           algo,
+                                                    y_desc.get(),
-                                           &beta,
+                                                    args[3].implicit(),
-                                           y_desc.get(),
+                                                    args[2].implicit(),
-                                           args[3].implicit(),
+                                                    args[2].get_shape().bytes(),
-                                           args[2].implicit(),
+                                                    solution_id);
-                                           args[2].get_shape().bytes());
    if(status != miopenStatusSuccess)
-        MIGRAPHX_THROW("Running deconvolution failed");
+        MIGRAPHX_THROW("MIOpen Deconvolution: running convolution failed");
    return args[3];
 }
-shape miopen_deconvolution::compile(context& ctx,
+shape miopen_deconvolution::find(context& ctx, const shape& output_shape, std::vector<shape> inputs)
-                                    const shape& output_shape,
-                                    std::vector<shape> inputs)
 {
    shape workspace_shape{};
    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
    auto y_desc = make_tensor(reshape_if_1d(output_shape));
@@ -119,9 +118,35 @@ shape miopen_deconvolution::compile(context& ctx,
                                                        workspace_size,
                                                        false);
    if(status != miopenStatusSuccess)
-        MIGRAPHX_THROW("Find deconvolution failed");
+        MIGRAPHX_THROW("MIOpen Deconvolution: find convolution failed");
-    handle = ctx.get_stream().get_miopen();
+    algo = perf.fwd_algo;
-    algo   = perf.fwd_algo;
+    size_t solution_count;
+    status = miopenConvolutionForwardGetSolutionCount(ctx.get_stream().get_miopen(),
+                                                      w_desc.get(),
+                                                      x_desc.get(),
+                                                      cd.get(),
+                                                      y_desc.get(),
+                                                      &solution_count);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: get solution count failed");
+    std::vector<miopenConvSolution_t> solutions(solution_count);
+    status = miopenConvolutionForwardGetSolution(ctx.get_stream().get_miopen(),
+                                                 w_desc.get(),
+                                                 x_desc.get(),
+                                                 cd.get(),
+                                                 y_desc.get(),
+                                                 solution_count,
+                                                 &solution_count,
+                                                 solutions.data());
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: get solution failed");
+    solution_id = solutions.front().solution_id;
    return shape{shape::int8_type, {perf.memory}};
 }
@@ -129,13 +154,29 @@ void miopen_deconvolution::finalize(context& ctx,
                                    const shape& output_shape,
                                    std::vector<shape> inputs)
 {
-    if(handle == ctx.get_stream().get_miopen())
+    if(cd == nullptr)
-        return;
+        cd = make_deconv(op);
-    // Check that workspace hasn't changed
+    if(solution_id == 0)
-    auto size = inputs.at(2).bytes();
+    {
-    auto ws   = compile(ctx, output_shape, std::move(inputs));
+        // Check that workspace hasn't changed
-    if(ws.bytes() > size)
+        auto size = inputs.at(2).bytes();
-        MIGRAPHX_THROW("Workspace has changed during finalization.");
+        auto ws   = find(ctx, output_shape, inputs);
+        if(ws.bytes() > size)
+            MIGRAPHX_THROW("MIOpen Deconvolution: workspace has changed during finalization.");
+    }
+    auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+    auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
+    auto y_desc = make_tensor(reshape_if_1d(output_shape));
+    auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
+                                                          w_desc.get(),
+                                                          x_desc.get(),
+                                                          cd.get(),
+                                                          y_desc.get(),
+                                                          solution_id);
+    if(status != miopenStatusSuccess)
+        MIGRAPHX_THROW("MIOpen Deconvolution: compile solution failed");
 }
 } // namespace gpu

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/fuse_mlir.hpp>
+#include <migraphx/gpu/mlir.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/pass_manager.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/register_op.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct module;
+namespace gpu {
+#ifdef MIGRAPHX_MLIR
+struct mlir_conv
+{
+    operation op = make_op("convolution");
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.op, "op"));
+    }
+    std::string name() const { return "gpu::mlir_conv"; }
+    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
+    {
+        check_shapes{inputs, *this}.standard();
+        if(mods.size() != 1)
+            MIGRAPHX_THROW("should have one submodule.");
+        if(inputs.size() < 2)
+            MIGRAPHX_THROW("should have at least two inputs.");
+        auto n = inputs.size();
+        return op.compute_shape({inputs[n - 2], inputs[n - 1]});
+    }
+};
+MIGRAPHX_REGISTER_OP(mlir_conv);
+namespace {
+struct find_conv_pointwise
+{
+    // Find a convolution followed by a pointwise operation.
+    auto matcher() const
+    {
+        auto convolution =
+            match::skip(match::name("contiguous"))(match::name("convolution").bind("convolution"));
+        return match::name("pointwise")(match::any_of[match::inputs()](convolution.bind("x")));
+    }
+    void apply(module_pass_manager& mpm, const match::matcher_result& r) const
+    {
+        auto ins      = r.result;
+        auto conv_ins = r.instructions["convolution"];
+        auto x_ins    = r.instructions["x"]; // input after contiguous
+        auto* pm      = ins->module_inputs().front();
+        auto names    = pm->get_parameter_names();
+        // Whitelist pointwise operators
+        if(std::any_of(pm->begin(), pm->end(), [](const auto& i) {
+               return not contains({"@literal", "@param", "@return", "convolution", "add", "relu"},
+                                   i.name());
+           }))
+            return;
+        // Only fuse with fp32 for now
+        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
+               return i->get_shape().type() != shape::type_t::float_type;
+           }))
+            return;
+        std::sort(names.begin(), names.end());
+        module_ref mm = mpm.create_module("mlir_" + pm->name());
+        mm->set_bypass();
+        std::unordered_map<instruction_ref, instruction_ref> param_map;
+        auto x    = mm->add_parameter("x" + std::to_string(names.size()),
+                                   conv_ins->inputs().at(0)->get_shape());
+        auto w    = mm->add_parameter("x" + std::to_string(names.size() + 1),
+                                   conv_ins->inputs().at(1)->get_shape());
+        auto conv = mm->add_instruction(conv_ins->get_operator(), {x, w});
+        std::transform(names.begin(),
+                       names.end(),
+                       ins->inputs().begin(),
+                       std::inserter(param_map, param_map.end()),
+                       [&](auto name, auto input) {
+                           if(input == x_ins)
+                               return std::make_pair(pm->get_parameter(name), conv);
+                           return std::make_pair(pm->get_parameter(name),
+                                                 mm->add_parameter(name, input->get_shape()));
+                       });
+        mm->add_return(mm->insert_instructions(mm->end(), pm, param_map));
+        std::vector<instruction_ref> inputs;
+        std::copy_if(ins->inputs().begin(),
+                     ins->inputs().end(),
+                     std::back_inserter(inputs),
+                     [&](auto input) { return input != conv_ins; });
+        inputs.insert(inputs.end(), conv_ins->inputs().begin(), conv_ins->inputs().end());
+        mpm.get_module().replace_instruction(
+            ins, mlir_conv{conv_ins->get_operator()}, inputs, {mm});
+    }
+};
+} // namespace
+#endif
+void fuse_mlir::apply(module_pass_manager& mpm) const
+{
+#ifdef MIGRAPHX_MLIR
+    match::find_matches(mpm, find_conv_pointwise{});
+#else
+    (void)mpm;
+#endif
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/gpu/fuse_ops.cpp
+++ b/src/targets/gpu/fuse_ops.cpp
@@ -336,6 +336,7 @@ void move_standard_front(std::vector<instruction_ref>& args)
 auto gpu_name(const std::string& s) { return match::name("gpu::" + s); }
+namespace {
 struct find_layernorm
 {
    auto matcher() const { return match::layernorm(&gpu_name); }
@@ -836,15 +837,6 @@ inline auto precompile_name(std::string s) // NOLINT
    });
 }
-template <class... Ms>
-auto conv_bias_pointwise(Ms... ms)
-{
-    return precompile_name("pointwise")(
-        match::either_arg(0, 1)(bias_shape(match::used_once()).bind("bias"),
-                                fusable_conv(match::used_once()).bind("conv")),
-        ms...);
-}
 struct find_conv_bias
 {
    context* ctx = nullptr;
@@ -1013,6 +1005,7 @@ struct find_commutative_broadcast
        m.replace_instruction(ins, ins->get_operator(), args);
    }
 };
+} // namespace
 struct find_contiguous
 {

--- a/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/code_object_op.hpp
@@ -38,12 +38,13 @@ struct context;
 struct code_object_op
 {
-    value::binary code_object;
+    value::binary code_object{};
-    std::string symbol_name;
+    std::string symbol_name = "";
-    std::size_t global;
+    std::size_t global      = 0;
-    std::size_t local;
+    std::size_t local       = 0;
-    std::vector<shape> expected_inputs;
+    std::vector<shape> expected_inputs{};
-    shape output;
+    shape output{};
+    std::int64_t output_arg = -1;
    kernel k{};
    template <class Self, class F>
@@ -66,9 +67,13 @@ struct code_object_op
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
    void finalize(context&, const shape&, const std::vector<shape>&);
+    std::int64_t get_output_arg(std::size_t n) const
+    {
+        return output_arg < 0 ? n + output_arg : output_arg;
+    }
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {
-        return shapes.size() - 1;
+        return get_output_arg(shapes.size());
    }
    friend std::ostream& operator<<(std::ostream& os, const code_object_op& op)

--- a/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/deconvolution.hpp
@@ -39,20 +39,20 @@ struct miopen_deconvolution
    op::deconvolution op;
    shared<convolution_descriptor> cd;
    miopenConvFwdAlgorithm_t algo{};
-    miopenHandle_t handle = nullptr;
+    uint64_t solution_id = 0;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        // TODO: Add algo
+        return pack_join(op::deconvolution::reflect(self.op, f),
-        return op::convolution::reflect(self.op, f);
+                         pack(f(self.solution_id, "solution_id")));
    }
    std::string name() const { return "gpu::deconv"; }
    shape compute_shape(const std::vector<shape>& inputs) const;
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const;
-    shape compile(context& ctx, const shape& output_shape, std::vector<shape> inputs);
+    shape find(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    void finalize(context& ctx, const shape& output_shape, std::vector<shape> inputs);
    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
    {

--- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp
@@ -33,6 +33,8 @@ namespace gpu {
 std::string get_device_name();
+int get_device_id();
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/mlir_conv.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/mlir_conv.hpp
@@ -21,8 +21,8 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP
+#ifndef MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
-#define MIGRAPHX_GUARD_RTGLIB_MIOPEN_MLIR_CONV_HPP
+#define MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
 #include <migraphx/config.hpp>
 #include <migraphx/gpu/context.hpp>
@@ -30,18 +30,19 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
-struct module;
+struct module_pass_manager;
 namespace gpu {
-struct mlir_conv
+struct fuse_mlir
 {
-    context* ctx;
+    context* ctx = nullptr;
-    std::string name() const { return "mlir::convolution"; }
+    std::string name() const { return "gpu::fuse_mlir"; }
-    void apply(module& m) const;
+    void apply(module_pass_manager& mpm) const;
 };
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_FUSE_MLIR_HPP
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
 #define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
+#include <migraphx/argument.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/config.hpp>
 #include <utility>