Merge branch 'dyn_ref_multibroadcast' of...

Merge branch 'dyn_ref_multibroadcast' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_transpose

Merge branch 'dyn_ref_multibroadcast' of...
Merge branch 'dyn_ref_multibroadcast' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_transpose
a9815bf4 · charlie · b80e2db1 · 2fa68ded · a9815bf4 · a9815bf4
Commit a9815bf4 authored Nov 16, 2022 by charlie
20 changed files
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -7,7 +7,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.6.0
+        uses: styfle/cancel-workflow-action@0.11.0
        with:
          access_token: ${{ github.token }}
  tidy:
@@ -15,9 +15,19 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: |
+        sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku 
-    - uses: actions/checkout@v2
+        du . --max-depth=1 -h
+        ls -la
+        cd /usr/local
+        du . --max-depth=1 -h
+        ls -la
+        cd /usr/local/lib
+        echo $(pwd)
+        du . --max-depth=1 -h
+        ls -la
+    - uses: actions/checkout@v3
    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -34,7 +44,7 @@ jobs:
        message("::set-output name=timestamp::${current_date}")
    - name: Cache files for tidy
-      uses: pat-s/always-upload-cache@v2.1.3
+      uses: pat-s/always-upload-cache@v3.0.11
      with:
        path: tidy-cache
        key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
@@ -65,8 +75,8 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -110,8 +120,8 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku 
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -146,10 +156,10 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: Install pyflakes
@@ -167,10 +177,10 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: run License Check
@@ -198,16 +208,16 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws*  /usr/local/lib/heroku
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.7
    - name: Cache dependencies
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
-      uses: actions/cache@v2
+      uses: actions/cache@v3
      with:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget
@@ -294,16 +304,16 @@ jobs:
    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
-    - uses: actions/checkout@v2
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.7
    - name: Cache dependencies
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
-      uses: actions/cache@v2
+      uses: actions/cache@v3
      with:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget

--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -55,6 +55,7 @@ add_library(migraphx
    insert_pad.cpp
    instruction.cpp
    json.cpp
+    layout_nhwc.cpp
    load_save.cpp
    make_op.cpp
    module.cpp
@@ -144,6 +145,7 @@ register_migraphx_ops(
    if_op
    im2col
    isnan
+    layout
    leaky_relu
    less
    load

--- a/src/auto_contiguous.cpp
+++ b/src/auto_contiguous.cpp
@@ -59,6 +59,8 @@ void auto_contiguous::apply(module& m) const
    auto last = std::prev(m.end());
    for(auto ins : iterator_for(m))
    {
+        if(ins->name() == "layout")
+            continue;
        // for last instruction that is NOT a return
        if(ins->outputs().empty() and ins != last)
            continue;

--- a/src/common.cpp
+++ b/src/common.cpp
@@ -68,13 +68,9 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
 std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1)
 {
-    assert(s0.dynamic() or s1.dynamic());
    // change both shapes to dynamic_dimension representation
-    if(not s0.dynamic())
+    s0 = s0.to_dynamic();
-        s0 = s0.to_dynamic();
+    s1 = s1.to_dynamic();
-    if(not s1.dynamic())
-        s1 = s1.to_dynamic();
    if(s0.ndim() > s1.ndim())
    {
        std::swap(s0, s1);

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -56,6 +56,8 @@ static void create_pointwise_modules(module_pass_manager& mpm)
    {
        if(not ins->get_operator().attributes().get("pointwise", false))
            continue;
+        if(ins->get_operator().name() == "layout")
+            continue;
        assert(ins->get_operator().attributes().contains("point_op"));
        auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
        pm->set_bypass();

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
+#include <migraphx/permutation.hpp>
 #include <migraphx/shape.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/stringutils.hpp>
@@ -232,6 +233,19 @@ struct check_shapes
        return *this;
    }
+    /*!
+     * Check all shapes are packed with certain layouts
+     */
+    const check_shapes&
+    packed_layouts(const std::initializer_list<std::vector<int64_t>>& layouts) const
+    {
+        if(not this->all_of([&](const shape& s) {
+               return s.packed() and contains(layouts, find_permutation(s));
+           }))
+            MIGRAPHX_THROW(prefix() + "Shapes are not packed with correct layout");
+        return *this;
+    }
    /*!
     * Check all shapes are packed or broadcasted.
     */

--- a/src/include/migraphx/layout_nhwc.hpp
+++ b/src/include/migraphx/layout_nhwc.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
+#include <string>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/config.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+struct module_pass_manager;
+/**
+ * Transform convolutions to nhwc
+ */
+struct layout_nhwc
+{
+    std::string name() const { return "layout_nhwc"; }
+    void apply(module_pass_manager& mpm) const;
+};
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -142,7 +142,6 @@ struct broadcast
    {
        return args[0].reshape(dyn_out.computed_shape);
    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/include/migraphx/op/layout.hpp
+++ b/src/include/migraphx/op/layout.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OP_LAYOUT_HPP
+#define MIGRAPHX_GUARD_OP_LAYOUT_HPP
+#include <migraphx/config.hpp>
+#include <array>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/op/unary.hpp>
+#include <cmath>
+#include <utility>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+struct layout : unary<layout>
+{
+    std::vector<int64_t> permutation;
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.permutation, "permutation"));
+    }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1).only_dims(permutation.size());
+        auto lens = inputs.at(0).lens();
+        auto t    = inputs.at(0).type();
+        return shape::from_permutation(t, lens, permutation);
+    }
+    auto apply() const
+    {
+        return [](auto x) { return x; };
+    }
+};
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_OP_LAYOUT_HPP
--- a/src/instruction.cpp
+++ b/src/instruction.cpp
--- a/src/layout_nhwc.cpp
+++ b/src/layout_nhwc.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/layout_nhwc.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/permutation.hpp>
+#include <migraphx/functional.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/eliminate_contiguous.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/pass_manager.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+template <class Predicate>
+std::vector<instruction_ref> find_lasts(const module& m, Predicate pred)
+{
+    std::vector<instruction_ref> result;
+    fix([&](auto self, auto ins) {
+        if(pred(ins))
+        {
+            result.push_back(ins);
+            return;
+        }
+        for(auto input : ins->inputs())
+            self(input);
+    })(std::prev(m.end()));
+    return result;
+}
+std::unordered_set<instruction_ref> preserve_output_layout(module& m)
+{
+    std::unordered_set<instruction_ref> result;
+    std::vector<instruction_ref> outputs = find_lasts(m, [](auto ins) {
+        return ins->name() == "convolution" and ins->get_shape().lens().size() == 4;
+    });
+    for(auto output : outputs)
+    {
+        auto permutation = find_permutation(output->get_shape());
+        auto layout      = m.insert_instruction(
+            std::next(output), make_op("layout", {{"permutation", permutation}}), output);
+        result.insert(m.replace_instruction(output, layout));
+    }
+    return result;
+}
+void transform_convolutions(module& m)
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "convolution")
+            continue;
+        if(ins->get_shape().lens().size() != 4)
+            continue;
+        auto v = ins->get_operator().to_value();
+        if(v.at("group").to<int>() > 1)
+            continue;
+        auto args = ins->inputs();
+        std::transform(args.begin(), args.end(), args.begin(), [&](const auto& i) {
+            return m.insert_instruction(ins, make_op("layout", {{"permutation", {0, 2, 3, 1}}}), i);
+        });
+        auto conv = m.insert_instruction(ins, ins->get_operator(), args);
+        auto c    = m.insert_instruction(ins, make_op("contiguous"), conv);
+        m.replace_instruction(ins, c);
+    }
+}
+void remove_layout(module& m, const std::unordered_set<instruction_ref>& output_layouts)
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "layout")
+            continue;
+        if(ins->get_shape() != ins->inputs().front()->get_shape())
+            continue;
+        if(contains(output_layouts, ins))
+            continue;
+        m.replace_instruction(ins, ins->inputs().front());
+    }
+}
+void layout_nhwc::apply(module_pass_manager& mpm) const
+{
+    std::unordered_set<instruction_ref> output_layouts = preserve_output_layout(mpm.get_module());
+    transform_convolutions(mpm.get_module());
+    mpm.run_pass(dead_code_elimination{});
+    mpm.run_pass(eliminate_contiguous{"contiguous"});
+    mpm.run_pass(dead_code_elimination{});
+    remove_layout(mpm.get_module(), output_layouts);
+    mpm.run_pass(dead_code_elimination{});
+}
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_split.cpp
+++ b/src/onnx/parse_split.cpp
@@ -26,6 +26,9 @@
 #include <migraphx/ranges.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/tune_axis.hpp>
+#include <migraphx/onnx/checks.hpp>
+#include <migraphx/stringutils.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -55,12 +58,12 @@ struct parse_split : op_parser<parse_split>
        {
            literal s = parser.parse_value(info.attributes.at("split"));
            s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
+        }
-            if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
+        else if(args.size() == 2)
-               static_cast<int64_t>(lens[tuned_axis]))
+        {
-            {
+            auto s = args[1]->eval();
-                MIGRAPHX_THROW("PARSE_SPLIT: sum of split attribute unequal to dim size of axis!");
+            check_arg_empty(s, "Split: dynamic shape is not supported");
-            }
+            s.visit([&](auto v) { vec_splits.assign(v.begin(), v.end()); });
        }
        // no split attribute, input is equally divided
        else
@@ -74,6 +77,15 @@ struct parse_split : op_parser<parse_split>
            vec_splits.resize(info.num_outputs, dl);
        }
+        if(std::accumulate(vec_splits.begin(), vec_splits.end(), int64_t(0)) !=
+           static_cast<int64_t>(lens[tuned_axis]))
+        {
+            MIGRAPHX_THROW(
+                "PARSE_SPLIT: sum of split attribute unequal to dim size of axis! tuned axis:" +
+                std::to_string(lens[tuned_axis]) + " Output " + to_string_range(vec_splits) +
+                " Rank " + std::to_string(n_rank) + " Len outs " + to_string_range(lens));
+        }
        std::vector<instruction_ref> ret_ins;
        int64_t start = 0;
        for(auto sl : vec_splits)

--- a/src/targets/cpu/binary.cpp
+++ b/src/targets/cpu/binary.cpp
@@ -51,7 +51,18 @@ struct dnnl_binary : dnnl_op<dnnl_binary, dnnl::binary>
        auto r  = s0;
        if(s0 != s1 or not s0.packed())
        {
-            r = shape{s0.type(), s0.lens()};
+            if(s0.packed() != s1.packed())
+            {
+                r = s0.packed() ? s0 : s1;
+            }
+            else if(s0.broadcasted() != s1.broadcasted())
+            {
+                r = s0.broadcasted() ? s1.with_lens(s0.lens()) : s0.with_lens(s0.lens());
+            }
+            else
+            {
+                r = {s0.type(), s0.lens()};
+            }
        }
        // Call to get_primitive to make sure an algo is available
        this->get_primitive(this->to_memory_desc(r, inputs));

--- a/src/targets/cpu/convolution.cpp
+++ b/src/targets/cpu/convolution.cpp
@@ -43,9 +43,9 @@ struct dnnl_convolution
        return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)};
    }
-    shape adjust_shape(const shape& x, int i) const
+    shape adjust_shape(const shape& x, int i, const shape& output) const
    {
-        auto s = base_adjust_shape(x);
+        auto s = base_adjust_shape(x, output);
        if(i == 1 and op.group > 1)
        {
            // TODO: Add support for transposed weights

--- a/src/targets/cpu/deconvolution.cpp
+++ b/src/targets/cpu/deconvolution.cpp
@@ -37,9 +37,9 @@ struct dnnl_deconvolution
        return {MIGRAPHX_DNNL_PREFIX(ARG_SRC), MIGRAPHX_DNNL_PREFIX(ARG_WEIGHTS)};
    }
-    shape adjust_shape(const shape& x, int i) const
+    shape adjust_shape(const shape& x, int i, const shape& output) const
    {
-        auto s = base_adjust_shape(x);
+        auto s = base_adjust_shape(x, output);
        if(i == 1)
        {
            // The input and output channels are flipped for dnnl

--- a/src/targets/cpu/include/migraphx/cpu/dnnl.hpp
+++ b/src/targets/cpu/include/migraphx/cpu/dnnl.hpp
@@ -167,7 +167,7 @@ struct dnnl_op : auto_register_op<Derived>
        std::iota(result.begin(), result.end(), MIGRAPHX_DNNL_PREFIX(ARG_SRC_0));
        return result;
    }
-    shape base_adjust_shape(const shape& s) const
+    shape base_adjust_shape(const shape& s, const shape& output) const
    {
        if(s.broadcasted())
        {
@@ -183,7 +183,8 @@ struct dnnl_op : auto_register_op<Derived>
                               else
                                   return len;
                           });
-            return shape{s.type(), lens};
+            // Use the permutation of the output
+            return output.with_lens(s.type(), lens);
        }
        return s;
    }
@@ -204,7 +205,10 @@ struct dnnl_op : auto_register_op<Derived>
            i++;
        }
    }
-    shape adjust_shape(const shape& s, int) const { return base_adjust_shape(s); }
+    shape adjust_shape(const shape& s, int, const shape& output) const
+    {
+        return base_adjust_shape(s, output);
+    }
    std::vector<int> create_arg_map(std::size_t input_size) const
    {
        const auto& self     = static_cast<const Derived&>(*this);
@@ -224,12 +228,12 @@ struct dnnl_op : auto_register_op<Derived>
        const auto& self = static_cast<const Derived&>(*this);
        std::unordered_map<int, dnnl::memory::desc> result;
        result[MIGRAPHX_DNNL_PREFIX(ARG_DST)] =
-            to_dnnl_memory_desc(self.adjust_shape(output_shape, inputs.size()));
+            to_dnnl_memory_desc(self.adjust_shape(output_shape, inputs.size(), output_shape));
        auto m = create_arg_map(inputs.size());
        assert(m.size() >= inputs.size());
        for(int i = 0; i < inputs.size(); i++)
        {
-            result[m[i]] = to_dnnl_memory_desc(self.adjust_shape(inputs[i], i));
+            result[m[i]] = to_dnnl_memory_desc(self.adjust_shape(inputs[i], i, output_shape));
        }
        return result;
    }

--- a/src/targets/cpu/reorder.cpp
+++ b/src/targets/cpu/reorder.cpp
@@ -32,7 +32,7 @@ struct dnnl_reorder : dnnl_op<dnnl_reorder, dnnl::reorder>
 {
    std::string name() const { return "dnnl::reorder"; }
-    shape adjust_shape(const shape& x, int) const { return x; }
+    shape adjust_shape(const shape& x, int, const shape&) const { return x; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {

--- a/src/targets/cpu/target.cpp
+++ b/src/targets/cpu/target.cpp
@@ -33,6 +33,7 @@
 #include <migraphx/eliminate_data_type.hpp>
 #include <migraphx/eliminate_identity.hpp>
 #include <migraphx/eliminate_pad.hpp>
+#include <migraphx/layout_nhwc.hpp>
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/propagate_constant.hpp>
 #include <migraphx/register_target.hpp>
@@ -82,6 +83,9 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
            dead_code_elimination{},
            simplify_algebra{},
            simplify_reshapes{},
+            layout_nhwc{},
+            dead_code_elimination{},
+            simplify_reshapes{},
            simplify_algebra{},
            auto_contiguous{},
            simplify_reshapes{},

--- a/src/targets/gpu/CMakeLists.txt
+++ b/src/targets/gpu/CMakeLists.txt
@@ -83,6 +83,7 @@ add_library(migraphx_gpu
    compile_gen.cpp
    compile_hip.cpp
    compile_hip_code_object.cpp
+    compile_miopen.cpp
    compiler.cpp
    device_name.cpp
    fuse_mlir.cpp

--- a/src/targets/gpu/compile_miopen.cpp
+++ b/src/targets/gpu/compile_miopen.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/gpu/compile_miopen.hpp>
+#include <migraphx/gpu/context.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/register_op.hpp>
+#include <migraphx/op/identity.hpp>
+#include <migraphx/gpu/rocblas.hpp>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace gpu {
+struct miopen_op
+{
+    operation op = op::identity{};
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.op, "op"));
+    }
+    std::string name() const { return "gpu::miopen_op"; }
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        inputs.push_back(inputs.back());
+        return op.compute_shape(inputs);
+    }
+    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
+    {
+        return shapes.size() - 1;
+    }
+};
+MIGRAPHX_REGISTER_OP(miopen_op);
+std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool format) const
+{
+    op.from_value({{"int8_x4_format", format}});
+    auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs()));
+    return v.get<std::size_t>("workspace", 0);
+}
+void compile_miopen::apply(module& m) const
+{
+    assert(ctx);
+    const bool int8_x4_format = get_int8_x4_format(any_cast<migraphx::gpu::context>(*ctx));
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "gpu::miopen_op")
+            continue;
+        auto op        = any_cast<miopen_op>(ins->get_operator()).op;
+        std::size_t ws = 0;
+        try
+        {
+            // for the regular convolution and deconvolution, this try would always succeed
+            ws = compile(op, ins, int8_x4_format);
+        }
+        catch(migraphx::exception&)
+        {
+            // In case no solver supports the default format, retry using the other format.
+            ws = compile(op, ins, not int8_x4_format);
+        }
+        auto inputs = ins->inputs();
+        auto alloc  = m.insert_instruction(
+            ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}}));
+        inputs.insert(std::prev(inputs.end()), alloc);
+        m.replace_instruction(ins, op, inputs);
+    }
+}
+} // namespace gpu
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx