Merge branch 'develop' into perk-kernel

eba1e778 · Umang Yadav · GitHub · 6ee87f92 · 5bf4dee6 · eba1e778
Unverified Commit eba1e778 authored Aug 10, 2022 by Umang Yadav Committed by GitHub Aug 10, 2022
20 changed files
--- a/src/targets/fpga/include/migraphx/fpga/subgraph.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/subgraph.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
+#define MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+
+struct subgraph
+{
+    std::string name() const { return "fpga::subgraph"; }
+    void apply(module_pass_manager& mpm) const;
+};
+
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif // MIGRAPHX_GUARD_FPGA_SUBGRAPH_HPP
--- a/src/targets/fpga/include/migraphx/fpga/target.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/target.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_FPGA_TARGET_HPP
+#define MIGRAPHX_GUARD_FPGA_TARGET_HPP
+
+#include <migraphx/program.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/compile_options.hpp>
+#include <migraphx/fpga/context.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct pass;
+namespace fpga {
+
+struct target
+{
+    std::string name() const;
+    std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;
+    migraphx::context get_context() const { return context{}; }
+    float is_supported(instruction_ref ins, support_metric m);
+
+    argument copy_to(const argument& arg) const { return arg; }
+    argument copy_from(const argument& arg) const { return arg; }
+    argument allocate(const shape& s) const;
+};
+
+MIGRAPHX_REGISTER_TARGET(target);
+
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+
+#endif // MIGRAPHX_GUARD_FPGA_TARGET_HPP
--- a/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+++ b/src/targets/fpga/include/migraphx/fpga/vitis_ai_adapter.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#ifndef MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
+#define MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
+
+#include <string>
+
+#include <migraphx/instruction.hpp>
+#include <migraphx/pass_manager.hpp>
+
+namespace vitis_ai {
+
+class x_model
+{
+    migraphx::shape shape;
+
+    public:
+    migraphx::shape get_shape() const;
+    void set_shape(migraphx::shape);
+};
+
+x_model create_xmodel(migraphx::module_ref mod);
+
+migraphx::argument execute(const x_model& xmodel,
+                           const migraphx::shape& output_shape,
+                           std::vector<migraphx::argument>& args);
+
+} // namespace vitis_ai
+
+#endif // MIGRAPHX_GUARD_FPGA_VITIS_AI_ADAPTER_HPP
--- a/src/targets/fpga/lowering.cpp
+++ b/src/targets/fpga/lowering.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/fpga/lowering.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/register_op.hpp>
+#include <migraphx/stringutils.hpp>
+#include <iostream>
+
+#include "migraphx/fpga/vitis_ai_adapter.hpp"
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+namespace fpga {
+
+struct fpga_vitis_op
+{
+    fpga_vitis_op() = default;
+    explicit fpga_vitis_op(vitis_ai::x_model model) : xmodel(std::move(model)){};
+
+    vitis_ai::x_model xmodel;
+    int dummy = 0;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        // return pack(f(self.xmodel, "xmodel"));
+        return pack(f(self.dummy, "dummy"));
+    }
+
+    std::string name() const { return "fpga::vitis_ai"; }
+
+    shape compute_shape(const std::vector<shape>& inputs) const
+    {
+        (void)inputs;
+        return xmodel.get_shape();
+    }
+
+    argument
+    compute(const context& ctx, const shape& output_shape, std::vector<argument> args) const
+    {
+        std::cout << "The context is " << ctx.id << std::endl;
+        return ::vitis_ai::execute(xmodel, output_shape, args);
+    }
+};
+MIGRAPHX_REGISTER_OP(fpga_vitis_op)
+
+void lowering::apply(module& m) const
+{
+    auto* mod = &m;
+
+    // test modifying the context from a pass
+    ctx->id = 2;
+
+    for(auto it : iterator_for(*mod))
+    {
+        if(it->name() == "fpga::vitis_placeholder")
+        {
+            assert(it->module_inputs().size() == 1);
+            auto xmodel = ::vitis_ai::create_xmodel(it->module_inputs()[0]);
+            mod->replace_instruction(it, fpga_vitis_op{xmodel}, it->inputs());
+        }
+    }
+}
+
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/subgraph.cpp
+++ b/src/targets/fpga/subgraph.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/fpga/subgraph.hpp>
+
+#include <migraphx/instruction.hpp>
+#include "migraphx/iterator.hpp"
+#include <migraphx/iterator_for.hpp>
+#include "migraphx/make_op.hpp"
+#include "migraphx/module.hpp"
+#include "migraphx/ranges.hpp"
+#include <migraphx/register_op.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/pass_manager.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+namespace fpga {
+
+struct fpga_placeholder_op
+{
+    fpga_placeholder_op() = default;
+
+    int dummy = 0;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.dummy, "dummy"));
+    }
+
+    std::string name() const { return "fpga::vitis_placeholder"; }
+
+    shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
+    {
+        (void)inputs;
+        if(mods.size() != 1)
+        {
+            MIGRAPHX_THROW("should have one submodule.");
+        }
+        module_ref sm = mods.front();
+        if(sm->get_output_shapes().size() != 1)
+            MIGRAPHX_THROW("Only one return");
+        return sm->get_output_shapes().front();
+    }
+};
+MIGRAPHX_REGISTER_OP(fpga_placeholder_op)
+
+bool is_fpga_instr(migraphx::instruction_ref it)
+{
+    // assuming all instructions that aren't @param, @literal, or input data are fpga instrs
+    if(migraphx::starts_with(it->name(), "@"))
+    {
+        return false;
+    }
+    // no inputs to the instr means it's input data
+    if(it->inputs().empty())
+    {
+        return false;
+    }
+    return true;
+}
+
+void subgraph::apply(module_pass_manager& mpm) const
+{
+    auto& mod = mpm.get_module();
+    auto* pm  = mpm.create_module(mod.name() + ":fpga");
+    pm->set_bypass();
+
+    migraphx::instruction_ref first = mod.end();
+    migraphx::instruction_ref last;
+    std::vector<migraphx::instruction_ref> literal_inputs;
+    for(auto it : iterator_for(mod))
+    {
+        // assuming we want all the params/literals as inputs to the FPGA submodule
+        if(migraphx::starts_with(it->name(), "@param") ||
+           migraphx::starts_with(it->name(), "@literal"))
+        {
+            literal_inputs.push_back(it);
+        }
+        if(is_fpga_instr(it))
+        {
+            if(first == mod.end())
+            {
+                first = it;
+            }
+            last = it;
+        }
+    }
+
+    // TODO(varunsh): this code may be replaceable by code in the fuse_pointwise pass
+
+    // assuming all FPGA instructions are in one contiguous range
+    pm->insert_instructions(pm->end(), first, last, {});
+
+    migraphx::instruction_ref placeholder_ins;
+    for(auto it : iterator_for(mod))
+    {
+        if(migraphx::starts_with(it->name(), "@return"))
+        {
+            placeholder_ins = mod.insert_instruction(
+                it, migraphx::make_op("fpga::vitis_placeholder"), literal_inputs, {pm});
+            break;
+        }
+    }
+
+    mod.replace_return({placeholder_ins});
+}
+
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/target.cpp
+++ b/src/targets/fpga/target.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <migraphx/fpga/target.hpp>
+#include <migraphx/fpga/lowering.hpp>
+#include <migraphx/fpga/subgraph.hpp>
+#include <migraphx/register_target.hpp>
+#include <migraphx/pass.hpp>
+#include <migraphx/auto_contiguous.hpp>
+#include <migraphx/rewrite_rnn.hpp>
+#include <migraphx/eliminate_pad.hpp>
+#include <migraphx/insert_pad.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/normalize_ops.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace fpga {
+
+std::string target::name() const { return "fpga"; }
+
+std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_options&) const
+{
+    // not sure if all these passes are needed but they were copied from ref/
+    auto& ctx = any_cast<context>(gctx);
+    return {normalize_ops{},
+            eliminate_pad{},
+            dead_code_elimination{},
+            insert_pad{},
+            dead_code_elimination{},
+            rewrite_rnn{},
+            dead_code_elimination{},
+            auto_contiguous{},
+            dead_code_elimination{},
+            subgraph{},
+            dead_code_elimination{},
+            lowering{&ctx},
+            dead_code_elimination{}};
+}
+
+argument target::allocate(const shape& s) const { return fill_argument(s, 0); }
+
+float is_supported(instruction_ref ins, support_metric m)
+{
+    // for now, not using the ins and metric to return a value
+    (void)ins;
+    (void)m;
+    return 1.0;
+}
+
+MIGRAPHX_REGISTER_TARGET(target);
+
+} // namespace fpga
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/targets/fpga/vitis_ai_adapter.cpp
+++ b/src/targets/fpga/vitis_ai_adapter.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "migraphx/fpga/vitis_ai_adapter.hpp"
+
+#include "migraphx/module.hpp"
+
+#include "migraphx/stringutils.hpp"
+namespace vitis_ai {
+
+migraphx::shape x_model::get_shape() const { return shape; };
+
+void x_model::set_shape(migraphx::shape s) { shape = s; }
+
+x_model create_xmodel(const migraphx::module_ref mod)
+{
+    std::cout << "Calling an external function: create_xmodel!\n";
+    x_model xmodel;
+    xmodel.set_shape(mod->get_output_shapes());
+    return xmodel;
+}
+
+migraphx::argument execute(const x_model& xmodel,
+                           const migraphx::shape& output_shape,
+                           std::vector<migraphx::argument>& args)
+{
+    (void)xmodel;
+
+    std::cout << "Calling an external function: execute!\n";
+
+    std::cout << "Output Shape: " << output_shape << std::endl;
+    std::cout << "Args: " << args.size() << std::endl;
+    for(const auto& arg : args)
+    {
+        std::cout << "  " << arg.get_shape() << std::endl;
+    }
+    std::cout << std::endl;
+
+    migraphx::argument result{output_shape};
+
+    return result;
+}
+
+} // namespace vitis_ai
--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -184,6 +184,7 @@ add_library(migraphx_gpu
    pack_int8_args.cpp
    prefuse_ops.cpp
    pad.cpp
+    perfdb.cpp
    pooling.cpp
    quant_convolution.cpp
    reverse.cpp

--- a/src/targets/gpu/compile_hip.cpp
+++ b/src/targets/gpu/compile_hip.cpp
@@ -43,6 +43,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DEBUG_SYM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_OPTIMIZE);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_ASM);
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_DUMP_SRC);
@@ -227,6 +228,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
    if(params.find("-std=") == std::string::npos)
        params += " --std=c++17";
    params += " -fno-gpu-rdc";
+    if(enabled(MIGRAPHX_GPU_DEBUG_SYM{}))
+        params += " -g";
    params += " -c";
    if(is_hcc_compiler())
    {

--- a/src/targets/gpu/compile_hip_code_object.cpp
+++ b/src/targets/gpu/compile_hip_code_object.cpp
@@ -51,9 +51,9 @@ static const char* const make_tensor_template = R"__migraphx__(
 template<>
 struct make_tensor<${n}>
 {
-    static __device__ auto apply(void* p)
+    static __device__ auto apply(void* __restrict__ p)
    {
-        return make_tensor_view(reinterpret_cast<${type}*>(p), make_shape(${lens}, ${strides}));
+        return make_tensor_view(reinterpret_cast<${type}* __restrict__>(p), make_shape(${lens}, ${strides}));
    }
 };
 )__migraphx__";

--- a/src/targets/gpu/hip.cpp
+++ b/src/targets/gpu/hip.cpp
@@ -23,13 +23,13 @@
 */

 #include <migraphx/gpu/hip.hpp>
-
 #include <migraphx/manage_ptr.hpp>
 #include <migraphx/register_op.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/device/contiguous.hpp>
 #include <miopen/miopen.h>
-
+#include <memory>
+#include <mutex>
 #include <vector>

 namespace migraphx {
@@ -77,12 +77,38 @@ void* get_device_ptr(void* hptr)
    return result;
 }

-hip_ptr allocate_gpu(std::size_t sz, bool host = false)
+struct host_ptr_cache
+{
+    std::unordered_map<void*, std::weak_ptr<void>> cache;
+    std::mutex m;
+    std::shared_ptr<void> get(void* ptr)
+    {
+        std::lock_guard<std::mutex> lock(m);
+        auto it = cache.find(ptr);
+        if(it != cache.end())
+            return it->second.lock();
+        return nullptr;
+    }
+
+    void put(const std::shared_ptr<void>& p)
+    {
+        std::lock_guard<std::mutex> lock(m);
+        cache[p.get()] = p;
+    }
+};
+
+static host_ptr_cache& get_host_ptr_cache()
+{
+    static host_ptr_cache cache;
+    return cache;
+}
+
+std::shared_ptr<void> allocate_gpu(std::size_t sz, bool host = false)
 {
    if(sz > get_available_gpu_memory())
        MIGRAPHX_THROW("Memory not available to allocate buffer: " + std::to_string(sz));
-    void* result = nullptr;
-    auto status  = host ? hipHostMalloc(&result, sz) : hipMalloc(&result, sz);
+    void* alloc_ptr = nullptr;
+    auto status     = host ? hipHostMalloc(&alloc_ptr, sz) : hipMalloc(&alloc_ptr, sz);
    if(status != hipSuccess)
    {
        if(host)
@@ -90,16 +116,28 @@ hip_ptr allocate_gpu(std::size_t sz, bool host = false)
        else
            return allocate_gpu(sz, true);
    }
-    assert(result != nullptr);
-    return hip_ptr{result};
+    assert(alloc_ptr != nullptr);
+    std::shared_ptr<void> result = share(hip_ptr{alloc_ptr});
+    if(host)
+    {
+        get_host_ptr_cache().put(result);
+    }
+    return result;
 }

-hip_host_ptr register_on_gpu(void* ptr, std::size_t sz)
+std::shared_ptr<void> register_on_gpu(void* ptr, std::size_t sz)
 {
+    std::shared_ptr<void> result = get_host_ptr_cache().get(ptr);
+    if(result)
+    {
+        return result;
+    }
    auto status = hipHostRegister(ptr, sz, hipHostRegisterMapped);
    if(status != hipSuccess)
        MIGRAPHX_THROW("Gpu register failed: " + hip_error(status));
-    return hip_host_ptr{ptr};
+    result = share(hip_host_ptr{ptr});
+    get_host_ptr_cache().put(result);
+    return result;
 }

 template <class T>
@@ -115,7 +153,7 @@ std::vector<T> read_from_gpu(const void* x, std::size_t sz)
    return result;
 }

-hip_ptr write_to_gpu(const void* x, std::size_t sz, bool host = false)
+std::shared_ptr<void> write_to_gpu(const void* x, std::size_t sz, bool host = false)
 {
    gpu_sync();
    auto result = allocate_gpu(sz, host);
@@ -137,22 +175,21 @@ hip_ptr write_to_gpu(const T& x)

 argument allocate_gpu(const shape& s, bool host)
 {
-    auto p = share(allocate_gpu(s.bytes() + 1, host));
+    auto p = allocate_gpu(s.bytes() + 1, host);
    return {s, [p]() mutable { return reinterpret_cast<char*>(p.get()); }};
 }

 argument register_on_gpu(const argument& arg)
 {
    auto arg_shared = arg.share();
-    auto p          = share(register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes()));
-    return {arg_shared.get_shape(), [p, a = std::move(arg_shared)]() mutable {
-                return get_device_ptr(p.get());
-            }}; // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
+    auto p          = register_on_gpu(arg_shared.data(), arg_shared.get_shape().bytes());
+    return {arg_shared.get_shape(),
+            [p, a = std::move(arg_shared)]() mutable { return get_device_ptr(p.get()); }};
+}

 argument to_gpu(const argument& arg, bool host)
 {
-    auto p = share(write_to_gpu(arg.data(), arg.get_shape().bytes(), host));
+    auto p = write_to_gpu(arg.data(), arg.get_shape().bytes(), host);
    return {arg.get_shape(), p};
 }


--- a/src/targets/gpu/include/migraphx/gpu/device_name.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device_name.hpp
@@ -33,6 +33,8 @@ namespace gpu {

 std::string get_device_name();

+int get_device_id();
+
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
 #define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP

+#include <migraphx/argument.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/config.hpp>
 #include <utility>

--- a/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_gemm_pack.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
 #define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP

+#include <migraphx/argument.hpp>
 #include <migraphx/op/quant_dot.hpp>
 #include <migraphx/config.hpp>
 #include <utility>

--- a/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_GPU_PERFDB_HPP
+#define MIGRAPHX_GUARD_GPU_PERFDB_HPP
+
+#include <migraphx/config.hpp>
+#include <migraphx/shape.hpp>
+#include <migraphx/operation.hpp>
+#include <string>
+#include <vector>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+struct problem_params
+{
+    operation op;
+    std::vector<shape> inputs;
+    shape output;
+};
+
+std::string get_mlir_perf_for_conv(const problem_params& pp);
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_GPU_PERFDB_HPP
--- a/src/targets/gpu/jit/pointwise.cpp
+++ b/src/targets/gpu/jit/pointwise.cpp
@@ -138,6 +138,8 @@ struct pointwise_compiler : compiler<pointwise_compiler>
            g.add_point_op("less", "migraphx::abs(${0} < ${1})");
            g.add_point_op("greater", "migraphx::abs(${0} > ${1})");
            g.add_point_op("not", "migraphx::abs(not ${0})");
+            g.add_point_op("mod", "migraphx::mod(${0}, ${1})");
+            g.add_point_op("fmod", "migraphx::fmod(${0}, ${1})");
            // Add explict conversions
            g.fresult([](const shape& s) {
                return "migraphx::convert<" + shape::cpp_type(s.type()) + ">";

--- a/src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
@@ -27,6 +27,7 @@
 #include <migraphx/kernels/hip.hpp>
 #include <migraphx/kernels/types.hpp>
 #include <migraphx/kernels/integral_constant.hpp>
+#include <migraphx/kernels/type_traits.hpp>

 namespace migraphx {

@@ -53,29 +54,51 @@ struct index
        return blockDim.x; // NOLINT
    }
 #endif
+    template <class N, class Stride>
+    static constexpr auto max_stride_iterations(N n, Stride stride)
+    {
+        return (n - _c<1>) / stride + _c<1>;
+    }

-    template <class F>
-    __device__ void global_stride(index_int n, F f) const
+    template <class F, class N, class Stride>
+    static constexpr void for_stride(index_int start, N n, Stride stride, F f)
    {
-        const auto stride = nglobal();
-        for(index_int i = global; i < n; i += stride)
+        if constexpr(not is_integral<N>{} and not is_integral<Stride>{} and
+                     max_stride_iterations(n, stride) == 1)
        {
-            f(i);
+            if constexpr(stride > n)
+            {
+                if(start < n)
+                    f(start);
+            }
+            else
+            {
+                f(start);
+            }
+        }
+        else
+        {
+            for(index_int i = start; i < n; i += stride)
+            {
+                f(i);
+            }
        }
    }

-    template <class F>
-    __device__ void local_stride(index_int n, F f) const
+    template <class F, class N>
+    __device__ void global_stride(N n, F f) const
    {
-        const auto stride = nlocal();
-        for(index_int i = local; i < n; i += stride)
-        {
-            f(i);
-        }
+        for_stride(global, n, nglobal(), f);
+    }
+
+    template <class F, class N>
+    __device__ void local_stride(N n, F f) const
+    {
+        for_stride(local, n, nlocal(), f);
    }
 };

-inline __device__ index make_index()
+inline __device__ __attribute__((const)) index make_index()
 {
    return index{blockIdx.x * blockDim.x + threadIdx.x, threadIdx.x, blockIdx.x}; // NOLINT
 }

--- a/src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+++ b/src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
 {
    return make_transform([=](auto f, auto... xs) {
        auto invoke = [=](auto... ys) {
-            __syncthreads();
+            if constexpr((Bs or ...))
+                __syncthreads();
            f(ys...);
        };
        join(invoke, preload_copy<Bs>(idx, xs)...);

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -44,6 +44,7 @@
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/device_name.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/gpu/perfdb.hpp>
 #include <deque>
 #include <variant>

@@ -145,6 +146,12 @@ std::string mlir_print(F f, T x)
    return ss.str();
 }

+const std::unordered_set<std::string>& get_xdlops_archs()
+{
+    static std::unordered_set<std::string> supported_archs{"gfx908", "gfx90a"};
+    return supported_archs;
+}
+
 struct mlir_program
 {
    mlir_program()
@@ -487,6 +494,17 @@ struct mlir_program
            ops.add_attribute_value(get_operator_value(ins->get_operator()));
            if(ins->name() != "@return")
                ops.add_results({get_shape(ins)});
+            if(ins->name() == "convolution")
+            {
+                pp =
+                    problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
+                std::string tuned = get_tune_params();
+                if(!tuned.empty())
+                    ops.add_attributes({{"perf_config", tuned}});
+                // check if HW supports xdlops
+                if(contains(get_xdlops_archs(), target_name))
+                    ops.add_attributes({{"xdlopsV2", true}});
+            }

            std::vector<MlirValue> inputs;
            transform(
@@ -508,14 +526,7 @@ struct mlir_program
        // 1st pipeline to call
        mlirMIGraphXAddHighLevelPipeline(pm.get());
        // 2nd pipeline to call
-        std::string tname = get_device_name();
-        // HACK: Since MLIR can't handle the full target name
-        auto hacked_tname = tname.substr(0, tname.find(':'));
-        if(tname.size() != hacked_tname.size())
-            std::cout
-                << "*************** WARNING: MLIR may not compile the correct target features for: "
-                << tname << std::endl;
-        mlirMIGraphXAddBackendPipeline(pm.get(), hacked_tname.c_str(), "amdgcn-amd-amdhsa", "");
+        mlirMIGraphXAddBackendPipeline(pm.get(), target_name.c_str(), "amdgcn-amd-amdhsa", "");
        mlirPassManagerRun(pm.get(), mmodule.get());

        code_object_op op{};
@@ -525,6 +536,17 @@ struct mlir_program
        return op;
    }

+    void find_target()
+    {
+        std::string tname = get_device_name();
+        // HACK: Since MLIR can't handle the full target name
+        target_name = trim(split_string(tname, ':').front());
+        if(tname.size() != target_name.size())
+            std::cout
+                << "*************** WARNING: MLIR may not compile the correct target features for: "
+                << tname << std::endl;
+    }
+
    std::pair<std::size_t, std::size_t> get_launch_params() const
    {
        uint32_t attrs[2];
@@ -545,10 +567,14 @@ struct mlir_program
        MIGRAPHX_THROW("Failed to compile mlir program");
    }

+    std::string get_tune_params() { return get_mlir_perf_for_conv(pp); }
+
    mlir_context ctx;
    MlirLocation location;
    mlir_module mmodule;
+    problem_params pp;
    std::deque<std::string> strings{};
+    std::string target_name;
 };

 std::string dump_mlir(const module& m)
@@ -565,6 +591,7 @@ code_object_op compile_mlir(const context&, const module& m)
    if(trace)
        std::cout << m << std::endl;
    mlir_program mp;
+    mp.find_target();
    mp.parse(m);
    auto mod_op = mlirModuleGetOperation(mp.mmodule.get());
    if(trace)

--- a/src/targets/gpu/perfdb.cpp
+++ b/src/targets/gpu/perfdb.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/perfdb.hpp>
+#include <migraphx/value.hpp>
+#include <migraphx/sqlite.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/permutation.hpp>
+#include <fstream>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+
+namespace {
+
+std::string get_layout(const shape& s, std::string labels)
+{
+    auto result = labels;
+    auto p      = find_permutation(s);
+    std::transform(p.begin(), p.end(), result.begin(), [&](auto i) { return labels[i]; });
+    return "'" + result + "'";
+}
+
+std::string get_type(const shape& s)
+{
+    static const std::unordered_map<shape::type_t, std::string> m = {
+        {shape::float_type, "'FP32'"},
+        {shape::half_type, "'FP16'"},
+        {shape::double_type, "'FP64'"},
+        {shape::int8_type, "'INT8'"},
+        {shape::int32_type, "'INT32'"},
+    };
+    auto it = m.find(s.type());
+    if(it == m.end())
+        return "UNKNOWN";
+    return it->second;
+}
+
+std::string generate_miopen_config(const problem_params& pp)
+{
+    value v       = pp.op.to_value();
+    auto input    = pp.inputs[0].lens();
+    auto weights  = pp.inputs[1].lens();
+    auto padding  = v["padding"].to_vector<std::size_t>();
+    auto stride   = v["stride"].to_vector<std::size_t>();
+    auto dilation = v["dilation"].to_vector<std::size_t>();
+    if(padding.size() != stride.size())
+        padding.erase(padding.begin() + padding.size() / 2, padding.end());
+    return to_string_range({std::string{" C.in_channels="},       to_string(input[1]),
+                            std::string{" AND C.in_h="},          to_string(input[2]),
+                            std::string{" AND C.in_w="},          to_string(input[3]),
+                            std::string{" AND C.fil_h="},         to_string(weights[2]),
+                            std::string{" AND C.fil_w="},         to_string(weights[3]),
+                            std::string{" AND C.out_channels="},  to_string(weights[0]),
+                            std::string{" AND C.batchsize="},     to_string(input[0]),
+                            std::string{" AND C.pad_h="},         to_string(padding[0]),
+                            std::string{" AND C.pad_w="},         to_string(padding[2]),
+                            std::string{" AND C.dilation_h="},    to_string(dilation[0]),
+                            std::string{" AND C.dilation_w="},    to_string(dilation[1]),
+                            std::string{" AND C.conv_stride_h="}, to_string(stride[0]),
+                            std::string{" AND C.conv_stride_w="}, to_string(stride[1]),
+                            std::string{" AND C.layout="},        get_layout(pp.inputs[0], "NCHW"),
+                            std::string{" AND C.data_type="},     get_type(pp.inputs[0]),
+                            std::string{" AND C.direction="},     std::string{"'F'"}},
+                           " ");
+}
+
+auto query_miopen_db(const std::string& query)
+{
+    // TODO: Store db as a static variable
+    const auto dbpath = fs::path{"/opt"} / "rocm" / "share" / "miopen" / "db" / "miopen.db";
+    // Check if db file exists.
+    std::ifstream dbs(dbpath);
+    if(dbs.is_open())
+    {
+        dbs.close();
+    }
+    else
+    {
+        std::vector<std::unordered_map<std::string, std::string>> empty;
+        return empty;
+    }
+
+    auto db = sqlite::read(dbpath);
+    return db.execute(query);
+}
+
+} // namespace
+
+std::string get_mlir_perf_for_conv(const problem_params& pp)
+{
+    std::string query = "select P.* \
+                             from perf_db P, config C \
+                             where P.config = C.id AND \
+                             P.solver = 'ConvMlirIgemmFwdXdlops' AND \
+                             ${config}";
+
+    auto results =
+        query_miopen_db(interpolate_string(query, {{"config", generate_miopen_config(pp)}}));
+    if(results.empty())
+        return "";
+    return results.front().at("params");
+}
+
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx