Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_model_test

224b7aa0 · charlie · c4b1102e · 4420ccbd · 224b7aa0 · 224b7aa0
Commit 224b7aa0 authored Nov 30, 2022 by charlie
20 changed files
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -7,7 +7,7 @@ jobs:
    runs-on: ubuntu-latest
    steps:
      - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.6.0
+        uses: styfle/cancel-workflow-action@0.11.0
        with:
          access_token: ${{ github.token }}
  tidy:
@@ -15,9 +15,19 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-
-    - uses: actions/checkout@v2
+      run: |
+        sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku 
+        du . --max-depth=1 -h
+        ls -la
+        cd /usr/local
+        du . --max-depth=1 -h
+        ls -la
+        cd /usr/local/lib
+        echo $(pwd)
+        du . --max-depth=1 -h
+        ls -la
+
+    - uses: actions/checkout@v3

    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -34,7 +44,7 @@ jobs:
        message("::set-output name=timestamp::${current_date}")

    - name: Cache files for tidy
-      uses: pat-s/always-upload-cache@v2.1.3
+      uses: pat-s/always-upload-cache@v3.0.11
      with:
        path: tidy-cache
        key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
@@ -65,8 +75,8 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
+    - uses: actions/checkout@v3

    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -110,8 +120,8 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku 
+    - uses: actions/checkout@v3

    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
@@ -146,10 +156,10 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: Install pyflakes
@@ -167,10 +177,10 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.8
    - name: run License Check
@@ -198,16 +208,16 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws*  /usr/local/lib/heroku
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.7
    - name: Cache dependencies
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
-      uses: actions/cache@v2
+      uses: actions/cache@v3
      with:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget
@@ -294,16 +304,16 @@ jobs:

    steps:
    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android
-    - uses: actions/checkout@v2
+      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
+    - uses: actions/checkout@v3
    - name: Set up Python
-      uses: actions/setup-python@v2
+      uses: actions/setup-python@v4
      with:
        python-version: 3.7
    - name: Cache dependencies
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
-      uses: actions/cache@v2
+      uses: actions/cache@v3
      with:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget

--- a/.github/workflows/performance.yaml
+++ b/.github/workflows/performance.yaml
@@ -5,14 +5,14 @@ on:
    branches: [develop]
    types: [opened, synchronize, closed]
  schedule:
-    - cron: "0 5 * * 1-6"
+    - cron: "0 6 * * 1-6"

  workflow_dispatch:
    inputs:
      rocm_release:
        description: ROCm Version
        required: true
-        default: '5.2'
+        default: '5.3'
      performance_reports_repo:
        description: Result repository
        required: true
@@ -30,9 +30,9 @@ concurrency: "perftest-${{ github.head_ref ||  github.base_ref || 'schedule' }}"

 jobs:
  release:
-    uses: rocmsoftwareplatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
+    uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
    with:
-      rocm_release: ${{ github.event.inputs.rocm_release || '5.2' }}
+      rocm_release: ${{ github.event.inputs.rocm_release || '5.3' }}
      result_number: ${{ github.event.inputs.result_number || '10' }}
      flags: ${{ github.event.inputs.flags || '-s' }} 
      performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }} 

--- a/Dockerfile
+++ b/Dockerfile
@@ -74,7 +74,8 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
 RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF

 # Install newer cmake for onnx runtime
-RUN cget -p /opt/cmake install kitware/cmake@v3.13.4
+ARG CMAKE_VERSION=3.24.2
+RUN cget -p /opt/cmake install -X binary https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz

 ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
 ARG ONNXRUNTIME_BRANCH=main

--- a/examples/migraphx/migraphx_driver/README.md
+++ b/examples/migraphx/migraphx_driver/README.md
@@ -29,6 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as
 | --tf | Load file as a tensorflow graph |
 | --migraphx | Load file as a migraphx graph |
 | --migraphx-json | Load file as a migraphx JSON graph |
+| --batch | Set batch size for the model | 
 | --nhwc | Treat tensorflow format as nhwc | 
 | --nchw | Treat tensorflow format as nchw |
 | --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |

--- a/examples/nlp/python_bert_squad/requirements_bertsquad.txt
+++ b/examples/nlp/python_bert_squad/requirements_bertsquad.txt
@@ -21,6 +21,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-tensorflow==2.7.2
+tensorflow==2.9.3
 onnxruntime
 tokenizers
\ No newline at end of file
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -55,6 +55,7 @@ add_library(migraphx
    insert_pad.cpp
    instruction.cpp
    json.cpp
+    layout_nhwc.cpp
    load_save.cpp
    make_op.cpp
    module.cpp
@@ -144,6 +145,7 @@ register_migraphx_ops(
    if_op
    im2col
    isnan
+    layout
    leaky_relu
    less
    load

--- a/src/api/include/migraphx/migraphx.hpp
+++ b/src/api/include/migraphx/migraphx.hpp
@@ -32,6 +32,7 @@
 #include <memory>
 #include <numeric>
 #include <exception>
+#include <array>
 #include <vector>
 #include <cassert>
 #include <iostream>

--- a/src/auto_contiguous.cpp
+++ b/src/auto_contiguous.cpp
@@ -59,6 +59,8 @@ void auto_contiguous::apply(module& m) const
    auto last = std::prev(m.end());
    for(auto ins : iterator_for(m))
    {
+        if(ins->name() == "layout")
+            continue;
        // for last instruction that is NOT a return
        if(ins->outputs().empty() and ins != last)
            continue;

--- a/src/common.cpp
+++ b/src/common.cpp
@@ -68,13 +68,9 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,

 std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1)
 {
-    assert(s0.dynamic() or s1.dynamic());
    // change both shapes to dynamic_dimension representation
-    if(not s0.dynamic())
-        s0 = s0.to_dynamic();
-    if(not s1.dynamic())
-        s1 = s1.to_dynamic();
-
+    s0 = s0.to_dynamic();
+    s1 = s1.to_dynamic();
    if(s0.ndim() > s1.ndim())
    {
        std::swap(s0, s1);

--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -42,6 +42,13 @@ static bool try_compute_shape(instruction_ref ins,
    try
    {
        shape new_shape = ins->get_operator().compute_shape(inputs, mods);
+
+        // Cannot tell if a dynamic shape will need to be made contiguous
+        if(new_shape.dynamic())
+        {
+            return false;
+        }
+
        // If the output shape is a standard shape, no need to try its output
        if(new_shape.standard())
        {
@@ -133,14 +140,20 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        }
    }

-    // Perform evaluations in parallel
+    // Perform static contiguous evaluations in parallel
    std::vector<argument> literals(const_instructions.size());
    par_for(const_instructions.size(), 1, [&](const auto i) {
-        auto c      = op::contiguous{};
-        auto prev   = const_instructions[i]->inputs().front();
-        literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()});
+        auto c    = op::contiguous{};
+        auto prev = const_instructions[i]->inputs().front();
+        // compute the output contiguous shape from the previous instruction shape
+        shape computed_shape                   = c.compute_shape({prev->get_shape()});
+        const std::vector<argument>& prev_eval = {prev->eval()};
+        // prev_eval should not be used in make_compute_output_shape() as computed_shape is static
+        auto co_shape = make_compute_output_shape(pack(c, computed_shape, prev_eval));
+        literals[i]   = c.compute(co_shape, prev_eval);
    });

+    // Replace static contiguous operations with a literal
    for(size_t i = 0; i < const_instructions.size(); i++)
    {
        auto l = m.add_literal(literals[i].get_shape(), literals[i].data());

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -45,7 +45,16 @@ static literal get_scalar(instruction_ref ins)
        return {};
    auto e = ins->eval();
    literal r{};
-    e.visit_at([&](auto x) { r = literal{x}; });
+    // needed for bool as visit_at invokes as() which promotes bool to int8
+    // Without this we'll break type checks for logical ops that are fused.
+    if(e.get_shape().type() == shape::bool_type)
+    {
+        r = literal{e.at<bool>()};
+    }
+    else
+    {
+        e.visit_at([&](auto x) { r = literal{x}; });
+    }
    return r;
 }

@@ -56,6 +65,8 @@ static void create_pointwise_modules(module_pass_manager& mpm)
    {
        if(not ins->get_operator().attributes().get("pointwise", false))
            continue;
+        if(ins->get_operator().name() == "layout")
+            continue;
        assert(ins->get_operator().attributes().contains("point_op"));
        auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
        pm->set_bypass();

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
@@ -24,6 +24,7 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CHECK_SHAPES_HPP

+#include <migraphx/permutation.hpp>
 #include <migraphx/shape.hpp>
 #include <migraphx/ranges.hpp>
 #include <migraphx/stringutils.hpp>
@@ -232,6 +233,19 @@ struct check_shapes
        return *this;
    }

+    /*!
+     * Check all shapes are packed with certain layouts
+     */
+    const check_shapes&
+    packed_layouts(const std::initializer_list<std::vector<int64_t>>& layouts) const
+    {
+        if(not this->all_of([&](const shape& s) {
+               return s.packed() and contains(layouts, find_permutation(s));
+           }))
+            MIGRAPHX_THROW(prefix() + "Shapes are not packed with correct layout");
+        return *this;
+    }
+
    /*!
     * Check all shapes are packed or broadcasted.
     */

--- a/src/include/migraphx/layout_nhwc.hpp
+++ b/src/include/migraphx/layout_nhwc.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
+
+#include <string>
+#include <migraphx/instruction_ref.hpp>
+#include <migraphx/config.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+struct module_pass_manager;
+
+/**
+ * Transform convolutions to nhwc
+ */
+struct layout_nhwc
+{
+    std::string name() const { return "layout_nhwc"; }
+    void apply(module_pass_manager& mpm) const;
+};
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_LAYOUT_NHWC_HPP
--- a/src/include/migraphx/op/broadcast.hpp
+++ b/src/include/migraphx/op/broadcast.hpp
@@ -128,8 +128,8 @@ struct broadcast
            {
                MIGRAPHX_THROW("BROADCAST_2in: s0 length doesn't match with static s1 axis "
                               "dimension length (" +
-                               migraphx::to_string(s0.dyn_dims()[0]) +
-                               " != " + migraphx::to_string(s1.dyn_dims()[axis]) + ")");
+                               migraphx::to_string(s0.lens()[0]) +
+                               " != " + migraphx::to_string(s1.lens()[axis]) + ")");
            }
            std::vector<size_t> bcast_strides(s1.ndim(), 0);
            std::copy(s0.strides().begin(), s0.strides().end(), bcast_strides.begin() + axis);

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
@@ -28,6 +28,7 @@
 #include <migraphx/argument.hpp>
 #include <migraphx/shape_for_each.hpp>
 #include <migraphx/config.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,19 +43,27 @@ namespace op {
 struct contiguous
 {
    std::string name() const { return "contiguous"; }
+
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
-        if(inputs.front().standard())
-            return inputs.front();
-        auto lens = inputs.at(0).lens();
-        auto t    = inputs.at(0).type();
-        return {t, lens};
+        check_shapes{inputs, *this, true}.has(1);
+        auto s0 = inputs.front();
+        if(s0.dynamic() or s0.standard())
+        {
+            return s0;
+        }
+        else
+        {
+            const auto& lens = s0.lens();
+            auto t           = s0.type();
+            return {t, lens};
+        }
    }
-    argument compute(const shape& output_shape, std::vector<argument> args) const
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        assert(output_shape.standard());
-        argument result{output_shape};
+        assert(dyn_out.computed_shape.standard());
+        argument result{dyn_out.computed_shape};
        visit_all(result, args[0])([&](auto output, auto input) {
            shape_for_each(output.get_shape(), [&](const auto& idx) {
                output(idx.begin(), idx.end()) = input(idx.begin(), idx.end());

--- a/src/include/migraphx/op/layout.hpp
+++ b/src/include/migraphx/op/layout.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_OP_LAYOUT_HPP
+#define MIGRAPHX_GUARD_OP_LAYOUT_HPP
+
+#include <migraphx/config.hpp>
+#include <array>
+#include <migraphx/check_shapes.hpp>
+#include <migraphx/stringutils.hpp>
+#include <migraphx/streamutils.hpp>
+#include <migraphx/literal.hpp>
+#include <migraphx/op/unary.hpp>
+#include <cmath>
+#include <utility>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+namespace op {
+
+struct layout : unary<layout>
+{
+    std::vector<int64_t> permutation;
+
+    template <class Self, class F>
+    static auto reflect(Self& self, F f)
+    {
+        return pack(f(self.permutation, "permutation"));
+    }
+
+    shape compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this}.has(1).only_dims(permutation.size());
+        auto lens = inputs.at(0).lens();
+        auto t    = inputs.at(0).type();
+        return shape::from_permutation(t, lens, permutation);
+    }
+
+    auto apply() const
+    {
+        return [](auto x) { return x; };
+    }
+};
+
+} // namespace op
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_OP_LAYOUT_HPP
--- a/src/include/migraphx/op/transpose.hpp
+++ b/src/include/migraphx/op/transpose.hpp
@@ -29,6 +29,7 @@
 #include <migraphx/config.hpp>
 #include <migraphx/value.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/dyn_output.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -45,17 +46,15 @@ struct transpose
    }

    std::string name() const { return "transpose"; }
+
    shape compute_shape(std::vector<shape> inputs) const
    {
-        check_shapes{inputs, *this}.has(1);
-        auto input         = inputs.at(0);
-        auto input_lens    = input.lens();
-        auto input_strides = input.strides();
-        auto t             = input.type();
+        check_shapes{inputs, *this, true}.has(1);
+        auto input = inputs.at(0);

-        if(dims.size() != input_lens.size())
+        if(dims.size() != input.ndim())
        {
-            MIGRAPHX_THROW("Permutation has wrong number of axes");
+            MIGRAPHX_THROW("TRANSPOSE: Permutation has wrong number of axes");
        }
        std::vector<int64_t> axes(dims.size());
        std::iota(axes.begin(), axes.end(), 0);
@@ -63,19 +62,36 @@ struct transpose
        {
            MIGRAPHX_THROW("TRANSPOSE: Invalid permutation");
        }
-        std::vector<size_t> output_lens(input_lens.size());
-        std::vector<size_t> output_strides(input_lens.size());
-        for(std::size_t i = 0; i < output_lens.size(); i++)
+
+        if(input.dynamic())
        {
-            output_lens[i]    = input_lens[dims[i]];
-            output_strides[i] = input_strides[dims[i]];
+            std::vector<shape::dynamic_dimension> output_dyn_dims(input.ndim());
+            std::transform(dims.cbegin(), dims.cend(), output_dyn_dims.begin(), [&](auto dim) {
+                return input.dyn_dims()[dim];
+            });
+            return {input.type(), output_dyn_dims};
+        }
+        else
+        {
+            auto input_lens    = input.lens();
+            auto input_strides = input.strides();
+
+            std::vector<size_t> output_lens(input.ndim());
+            std::vector<size_t> output_strides(input.ndim());
+            for(std::size_t i = 0; i < input.ndim(); i++)
+            {
+                output_lens[i]    = input_lens[dims[i]];
+                output_strides[i] = input_strides[dims[i]];
+            }
+            return {input.type(), output_lens, output_strides};
        }
-        return {t, output_lens, output_strides};
    }
-    argument compute(shape output_shape, std::vector<argument> args) const
+
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        return args[0].reshape(output_shape);
+        return args[0].reshape(dyn_out.computed_shape);
    }
+
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };


--- a/src/instruction.cpp
+++ b/src/instruction.cpp
--- a/src/layout_nhwc.cpp
+++ b/src/layout_nhwc.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/layout_nhwc.hpp>
+#include <migraphx/module.hpp>
+#include <migraphx/instruction.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/permutation.hpp>
+#include <migraphx/functional.hpp>
+#include <migraphx/ranges.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/eliminate_contiguous.hpp>
+#include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/pass_manager.hpp>
+
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+
+template <class Predicate>
+std::vector<instruction_ref> find_lasts(const module& m, Predicate pred)
+{
+    std::vector<instruction_ref> result;
+    fix([&](auto self, auto ins) {
+        if(pred(ins))
+        {
+            result.push_back(ins);
+            return;
+        }
+        for(auto input : ins->inputs())
+            self(input);
+    })(std::prev(m.end()));
+    return result;
+}
+
+std::unordered_set<instruction_ref> preserve_output_layout(module& m)
+{
+    std::unordered_set<instruction_ref> result;
+    std::vector<instruction_ref> outputs = find_lasts(m, [](auto ins) {
+        return ins->name() == "convolution" and ins->get_shape().lens().size() == 4;
+    });
+    for(auto output : outputs)
+    {
+        auto permutation = find_permutation(output->get_shape());
+        auto layout      = m.insert_instruction(
+            std::next(output), make_op("layout", {{"permutation", permutation}}), output);
+        result.insert(m.replace_instruction(output, layout));
+    }
+    return result;
+}
+
+void transform_convolutions(module& m)
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "convolution")
+            continue;
+        if(ins->get_shape().lens().size() != 4)
+            continue;
+        auto v = ins->get_operator().to_value();
+        if(v.at("group").to<int>() > 1)
+            continue;
+        auto args = ins->inputs();
+        std::transform(args.begin(), args.end(), args.begin(), [&](const auto& i) {
+            return m.insert_instruction(ins, make_op("layout", {{"permutation", {0, 2, 3, 1}}}), i);
+        });
+        auto conv = m.insert_instruction(ins, ins->get_operator(), args);
+        auto c    = m.insert_instruction(ins, make_op("contiguous"), conv);
+        m.replace_instruction(ins, c);
+    }
+}
+
+void remove_layout(module& m, const std::unordered_set<instruction_ref>& output_layouts)
+{
+    for(auto ins : iterator_for(m))
+    {
+        if(ins->name() != "layout")
+            continue;
+        if(ins->get_shape() != ins->inputs().front()->get_shape())
+            continue;
+        if(contains(output_layouts, ins))
+            continue;
+        m.replace_instruction(ins, ins->inputs().front());
+    }
+}
+
+void layout_nhwc::apply(module_pass_manager& mpm) const
+{
+    std::unordered_set<instruction_ref> output_layouts = preserve_output_layout(mpm.get_module());
+    transform_convolutions(mpm.get_module());
+    mpm.run_pass(dead_code_elimination{});
+    mpm.run_pass(eliminate_contiguous{"contiguous"});
+    mpm.run_pass(dead_code_elimination{});
+    remove_layout(mpm.get_module(), output_layouts);
+    mpm.run_pass(dead_code_elimination{});
+}
+
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/onnx/parse_binary_op.cpp
+++ b/src/onnx/parse_binary_op.cpp
@@ -57,6 +57,12 @@ struct parse_binary_op : op_parser<parse_binary_op>
                parser.parse_value(info.attributes.at("broadcast")).at<uint64_t>();
            if(broadcasted != 0)
            {
+                if(std::any_of(
+                       args.cbegin(), args.cend(), [](auto a) { return a->get_shape().dynamic(); }))
+                {
+                    MIGRAPHX_THROW(
+                        "Binary op broadcast attribute not supported for dynamic input shapes");
+                }
                uint64_t axis = parser.parse_value(info.attributes.at("axis")).at<uint64_t>();
                auto l        = info.add_instruction(
                    make_op("broadcast",