Merge branch 'develop' into enable_navi_32_ci

0c98c38e · Ted Themistokleous · GitHub · 1612d8f3 · 64b306ab · 0c98c38e
Unverified Commit 0c98c38e authored Sep 12, 2023 by Ted Themistokleous Committed by GitHub Sep 12, 2023
20 changed files
--- a/.github/workflows/sync-onnxrt-main.yaml
+++ b/.github/workflows/sync-onnxrt-main.yaml
@@ -47,6 +47,7 @@ jobs:
            onnxruntime
            dependancies
            automated 
+            skip bot checks
          assignees: TedThemistokleous
          reviewers: TedThemistokleous causten
          draft: false

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,8 +2,49 @@
 Full documentation for MIGraphX is available at [MIGraphX Documentation](https://rocmdocs.amd.com/projects/AMDMIGraphX/en/latest/).
-## MIGraphX 2.5 for ROCm 5.5.0
+## MIGraphX 2.7 for ROCm 5.7.0
+### Added
+- Enabled hipRTC to not require dev packages for migraphx runtime and allow the ROCm install to be in a different directory than it was during build time
+- Add support for multi-target execution
+- Added Dynamic Batch support with C++/Python APIs
+- Add migraphx.create_argument to python API
+- Added dockerfile example for Ubuntu 22.04
+- Add TensorFlow supported ops in driver similar to exist onnx operator list
+- Add a MIGRAPHX_TRACE_MATCHES_FOR env variable to filter the matcher trace
+- Improved debugging by printing max,min,mean and stddev values for TRACE_EVAL = 2
+- use fast_math flag instead of ENV flag for GELU
+- Print message from driver if offload copy is set for compiled program
+### Optimizations
+- Optimized for ONNX Runtime 1.14.0
+- Improved compile times by only building for the GPU on the system
+- Improve performance of pointwise/reduction kernels when using NHWC layouts
+- Load specific version of the migraphx_py library
+- Annotate functions with the block size so the compiler can do a better job of optimizing 
+- Enable reshape on nonstandard shapes
+- Use half HIP APIs to compute max and min
+- Added support for broadcasted scalars to unsqueeze operator
+- Improved multiplies with dot operator
+- Handle broadcasts across dot and concat
+- Add verify namespace for better symbol resolution
+### Fixed
+- Resolved accuracy issues with FP16 resnet50
+- Update cpp generator to handle inf from  float
+- Fix assertion error during verify and make DCE work with tuples
+- Fix convert operation for NaNs
+- Fix shape typo in API test
+- Fix compile warnings for shadowing variable names
+- Add missing specialization for the `nullptr` for the hash function
+### Changed
+- Bumped version of half library to 5.6.0
+- Bumped CI to support rocm 5.6
+- Make building tests optional
+- replace np.bool with bool as per numpy request
+### Removed
+- Removed int8x4 rocBlas calls due to deprecation
+- removed std::reduce usage since not all OS' support it
+## MIGraphX 2.5 for ROCm 5.5.0
 ### Added
 - Y-Model feature to store tuning information with the optimized model
 - Added Python 3.10 bindings 
@@ -12,15 +53,11 @@ Full documentation for MIGraphX is available at [MIGraphX Documentation](https:/
 - Build support for ROCm MLIR
 - Added migraphx-driver flag to print optimizations in python (--python)
 - Added JIT implementation of the Gather and Pad operator which results in better handling of larger tensor sizes.
 ### Optimizations
 - Improved performance of Transformer based models
 - Improved performance of the Pad, Concat, Gather, and Pointwise operators
 - Improved onnx/pb file loading speed
 - Added general optimize pass which runs several passes such as simplify_reshapes/algebra and DCE in loop.
 ### Fixed
 - Improved parsing Tensorflow Protobuf files 
 - Resolved various accuracy issues with some onnx models
@@ -29,6 +66,5 @@ Full documentation for MIGraphX is available at [MIGraphX Documentation](https:/
 - Use --offload-arch instead of --cuda-gpu-arch for the HIP compiler
 - Changes inside JIT to use float accumulator for large reduce ops of half type to avoid overflow.
 - Changes inside JIT to temporarily use cosine to compute sine function.
 ### Changed
 - Changed version/location of 3rd party build dependencies to pick up fixes
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -116,10 +116,10 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
        stash includes: 'build/*.deb', name: 'migraphx-package'
    }
-}, hidden_symbols: rocmnode('cdna') { cmake_build ->
+// }, hidden_symbols: rocmnode('cdna') { cmake_build ->
-    stage('Hidden symbols') {
+//     stage('Hidden symbols') {
-        cmake_build(flags: "-DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_VISIBILITY_PRESET=hidden -DCMAKE_C_VISIBILITY_PRESET=hidden")
+//         cmake_build(flags: "-DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_VISIBILITY_PRESET=hidden -DCMAKE_C_VISIBILITY_PRESET=hidden")
-    }
+//     }
 }, all_targets_debug : rocmnode('cdna') { cmake_build ->
    stage('All targets Release') {
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") 

--- a/docs/.sphinx/requirements.in
+++ b/docs/.sphinx/requirements.in
-rocm-docs-core==0.11.0
+rocm-docs-core>=0.20.0
--- a/docs/.sphinx/requirements.txt
+++ b/docs/.sphinx/requirements.txt
@@ -21,7 +21,7 @@ charset-normalizer==3.1.0
    # via requests
 click==8.1.3
    # via sphinx-external-toc
-cryptography==40.0.2
+cryptography==41.0.3
    # via pyjwt
 deprecated==1.2.13
    # via pygithub
@@ -45,8 +45,6 @@ jinja2==3.1.2
    # via
    #   myst-parser
    #   sphinx
-linkify-it-py==1.0.3
-    # via myst-parser
 markdown-it-py==2.2.0
    # via
    #   mdit-py-plugins
@@ -57,7 +55,7 @@ mdit-py-plugins==0.3.5
    # via myst-parser
 mdurl==0.1.2
    # via markdown-it-py
-myst-parser[linkify]==1.0.0
+myst-parser==1.0.0
    # via rocm-docs-core
 packaging==23.1
    # via
@@ -89,7 +87,7 @@ requests==2.28.2
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==0.11.0
+rocm-docs-core>=0.20.0
    # via -r requirements.in
 smmap==5.0.0
    # via gitdb
@@ -132,8 +130,6 @@ sphinxcontrib-serializinghtml==1.1.5
    # via sphinx
 typing-extensions==4.5.0
    # via pydata-sphinx-theme
-uc-micro-py==1.0.1
-    # via linkify-it-py
 urllib3==1.26.15
    # via requests
 wrapt==1.15.0

--- a/mlir-requirements.txt
+++ b/mlir-requirements.txt
@@ -21,4 +21,4 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-ROCmSoftwarePlatform/rocMLIR@ea15b3597ce55b9088621818228595dd48fb6ec0 -DBUILD_FAT_LIBROCKCOMPILER=On
+ROCmSoftwarePlatform/rocMLIR@3657f509bfed86bb79d5c6e24aa237e48f09f9f3 -DBUILD_FAT_LIBROCKCOMPILER=On
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -36,6 +36,7 @@ add_library(migraphx
    argument.cpp
    auto_contiguous.cpp
    common.cpp
+    common_dims.cpp
    compile_src.cpp
    convert_to_json.cpp
    cpp_generator.cpp

--- a/src/common_dims.cpp
+++ b/src/common_dims.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/common_dims.hpp>
+#include <migraphx/ranges.hpp>
+#include <algorithm>
+#include <cassert>
+#include <numeric>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+template <class Iterator>
+static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
+{
+    std::size_t x = 1;
+    auto it       = std::find_if(start, last, [&](auto i) {
+        x *= i;
+        return x > dim;
+    });
+    if(x < dim)
+        return start;
+    return it;
+}
+template <class Range>
+static auto elements(const Range& r)
+{
+    return std::accumulate(r.begin(), r.end(), std::size_t{1}, std::multiplies<>{});
+}
+struct common_dim_state
+{
+    common_dim_state(const std::vector<std::size_t>& pdims,
+                     std::vector<std::vector<std::size_t>>& paxes_map)
+        : dims(&pdims), axes_map(&paxes_map), it(dims->begin())
+    {
+    }
+    const std::vector<std::size_t>* dims            = nullptr;
+    std::vector<std::vector<std::size_t>>* axes_map = nullptr;
+    std::vector<std::size_t>::const_iterator it{};
+    std::size_t rem = 1;
+    std::size_t get() const { return *it / rem; }
+    bool is_end() const { return it == dims->end(); }
+    void next(std::size_t i = 1) { it += i; }
+    auto dims_for(std::size_t d) const
+    {
+        auto dim_end = compute_end_dim(it, dims->end(), d);
+        return range(it, dim_end);
+    }
+    void add_axes(std::size_t naxes, std::size_t start) MIGRAPHX_TIDY_CONST
+    {
+        auto axes = compute_axes(naxes, start);
+        axes_map->push_back(std::move(axes));
+    }
+    void add_multi_axes(std::size_t naxes, std::size_t start) MIGRAPHX_TIDY_CONST
+    {
+        auto axes = compute_axes(naxes, start);
+        std::transform(axes.begin(),
+                       axes.end(),
+                       std::back_inserter(*axes_map),
+                       [&](auto axis) -> std::vector<std::size_t> { return {axis}; });
+    }
+    std::vector<std::size_t> compute_axes(std::size_t naxes, std::size_t start) const
+    {
+        if(rem != 1)
+        {
+            assert(start > 0);
+            naxes++;
+            start--;
+        }
+        std::vector<std::size_t> axes(naxes);
+        std::iota(axes.begin(), axes.end(), start);
+        return axes;
+    }
+};
+static bool compute_common_dim(std::vector<std::size_t>& cd_dims,
+                               common_dim_state& state1,
+                               common_dim_state& state2)
+{
+    assert(state1.get() <= state2.get());
+    auto d2    = state2.get();
+    auto dims  = state1.dims_for(d2);
+    auto n     = elements(dims);
+    auto naxes = distance(dims);
+    if(naxes == 0)
+        return false;
+    // If not divisible then we can't compute a common dim
+    if((d2 % n) != 0)
+        return false;
+    auto rem = d2 / n;
+    state1.add_multi_axes(naxes, cd_dims.size());
+    state2.add_axes(rem == 1 ? naxes : naxes + 1, cd_dims.size());
+    state1.rem = rem;
+    state2.rem = 1;
+    cd_dims.insert(cd_dims.end(), dims.begin(), dims.end());
+    if(state1.rem != 1)
+        cd_dims.push_back(state1.rem);
+    state1.next(distance(dims));
+    state2.next();
+    return true;
+}
+common_dims common_dims::compute(const std::vector<std::size_t>& dims1,
+                                 const std::vector<std::size_t>& dims2)
+{
+    assert(elements(dims1) > 0);
+    assert(elements(dims1) == elements(dims2));
+    common_dims cd;
+    common_dim_state state1{dims1, cd.axes_map1};
+    common_dim_state state2{dims2, cd.axes_map2};
+    while(not state1.is_end() and not state2.is_end())
+    {
+        auto d1 = state1.get();
+        auto d2 = state2.get();
+        if(d1 <= d2)
+        {
+            if(not compute_common_dim(cd.dims, state1, state2))
+                return {};
+        }
+        else // if(d1 > d2)
+        {
+            if(not compute_common_dim(cd.dims, state2, state1))
+                return {};
+        }
+    }
+    assert(elements(dims1) == elements(cd.dims));
+    return cd;
+}
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -35,6 +35,8 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS)
 static bool try_compute_shape(instruction_ref ins,
                              const std::vector<shape>& inputs,
                              const std::vector<module_ref>& mods)
@@ -78,14 +80,26 @@ static bool try_compute_shape(instruction_ref ins,
                return (arg == ins) ? new_shape : arg->get_shape();
            });
-            if(not try_compute_shape(output, input_shapes, mods))
+            if(not try_compute_shape(output, input_shapes, output->module_inputs()))
            {
                return false;
            }
        }
    }
+    catch(const std::exception& e)
+    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Exception: " << e.what() << std::endl;
+        }
+        return false;
+    }
    catch(...)
    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Unknown exception" << std::endl;
+        }
        return false;
    }
@@ -127,6 +141,11 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        {
            if(arg->name() != op_name)
                continue;
+            if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+            {
+                std::cout << "eliminate_contiguous: ";
+                m.debug_print(ins);
+            }
            auto prev = arg->inputs().front();
            replace(new_args, arg, prev);
            if(try_compute_shape(ins, new_args, mod_args))

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -24,11 +24,14 @@
 #include <migraphx/fuse_pointwise.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/iterator_for.hpp>
 #include <migraphx/ranges.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/common_dims.hpp>
 #include <iterator>
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_POINTWISE_FUSION)
@@ -189,6 +192,54 @@ static bool find_pointwise_modules(module& m)
    }
    return changed;
 }
+namespace {
+struct find_pointwise_reshape_pointwise
+{
+    auto matcher() const
+    {
+        auto reshape =
+            match::name("reshape", "squeeze", "unsqueeze", "flatten")(match::used_once());
+        auto skip_contiguous = [](auto... ms) {
+            return match::arg(0)(match::skip(match::name("contiguous")(match::used_once()))(ms...));
+        };
+        auto pointwise         = match::name("pointwise")(match::used_once());
+        auto reshape_pointwise = reshape(skip_contiguous(pointwise.bind("x"))).bind("reshape");
+        return match::name("pointwise")(match::any_of[match::inputs()](reshape_pointwise));
+    }
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins         = r.result;
+        auto x_ins       = r.instructions["x"];
+        auto reshape_ins = r.instructions["reshape"];
+        auto cd = common_dims::compute(ins->get_shape().lens(), x_ins->get_shape().lens());
+        if(cd.dims.empty())
+            return;
+        auto reshape_input = [&](const auto& ins_to_insert) {
+            return [&](auto input) {
+                auto c = m.insert_instruction(ins_to_insert, make_op("contiguous"), input);
+                return m.insert_instruction(
+                    ins_to_insert, make_op("reshape", {{"dims", cd.dims}}), c);
+            };
+        };
+        auto x_inputs = x_ins->inputs();
+        std::transform(x_inputs.begin(), x_inputs.end(), x_inputs.begin(), reshape_input(x_ins));
+        auto new_x_ins =
+            m.insert_instruction(x_ins, x_ins->get_operator(), x_inputs, x_ins->module_inputs());
+        auto inputs = ins->inputs();
+        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
+            if(input == reshape_ins)
+                return new_x_ins;
+            return reshape_input(ins)(input);
+        });
+        auto pw = m.insert_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
+        m.replace_instruction(ins, make_op("reshape", {{"dims", ins->get_shape().lens()}}), pw);
+    }
+};
+} // namespace
 void fuse_pointwise::apply(module_pass_manager& mpm) const
 {
@@ -200,6 +251,8 @@ void fuse_pointwise::apply(module_pass_manager& mpm) const
    }
    for(int i = 0; i < 8; i++)
    {
+        match::find_matches(mpm.get_module(), find_pointwise_reshape_pointwise{});
+        mpm.run_pass(simplify_reshapes{1});
        if(not find_pointwise_modules(mpm.get_module()))
            break;
        mpm.run_pass(dead_code_elimination{});

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -70,13 +70,19 @@ struct check_shapes
        check_dynamic();
    }
-    template <class Op>
+    template <class Op, MIGRAPHX_REQUIRES(not std::is_convertible<Op, std::string>{})>
    check_shapes(const std::vector<shape>& s, const Op& op, const bool d = false)
        : begin(s.begin()), end(s.end()), name(op.name()), dynamic_allowed(d)
    {
        check_dynamic();
    }
+    check_shapes(const std::vector<shape>& s, const std::string& n, const bool d = false)
+        : begin(s.begin()), end(s.end()), name(n), dynamic_allowed(d)
+    {
+        check_dynamic();
+    }
    void check_dynamic() const
    {
        if(not dynamic_allowed and this->any_of([&](const shape& s) { return s.dynamic(); }))
@@ -228,6 +234,16 @@ struct check_shapes
        return *this;
    }
+    /*!
+     * Check all shapes have the same layout.
+     */
+    const check_shapes& same_layout() const
+    {
+        if(not this->same([](const shape& s) { return find_permutation(s); }))
+            MIGRAPHX_THROW(prefix() + "Layouts do not match");
+        return *this;
+    }
    /*!
     * Check all shapes are standard.
     */

--- a/src/include/migraphx/common_dims.hpp
+++ b/src/include/migraphx/common_dims.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
+#include <migraphx/config.hpp>
+#include <cstdint>
+#include <vector>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+/// This will compute a higher dimensional space that will preserve the axes
+/// for both sets of dimensions. Two axes_maps are provided for each of the
+/// dims that will map the axis to the axes that are used by the result of
+/// common_dims.
+struct MIGRAPHX_EXPORT common_dims
+{
+    static common_dims compute(const std::vector<std::size_t>& dims1,
+                               const std::vector<std::size_t>& dims2);
+    std::vector<std::size_t> dims;
+    std::vector<std::vector<std::size_t>> axes_map1;
+    std::vector<std::vector<std::size_t>> axes_map2;
+};
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
--- a/src/include/migraphx/convolution.hpp
+++ b/src/include/migraphx/convolution.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,7 +62,7 @@ void convolution(Output output, T input, T weights, Padding padding, Stride stri
        shape win_shape{output_shape.type(), win_size};
        double acc = 0.0;
-        shape_for_each(win_shape, [&](auto idx_win) {
+        shape_for_each(win_shape, [&](const auto& idx_win) {
            auto k           = idx_win[0];
            const auto in_ch = group_id * wei_c + k;
            std::vector<std::ptrdiff_t> idx(idx_o.begin(), idx_o.end());

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -381,22 +381,24 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
    const int trace         = value_of(MIGRAPHX_TRACE_MATCHES{});
    const bool validate     = enabled(MIGRAPHX_VALIDATE_MATCHES{});
    const auto trace_filter = string_value_of(MIGRAPHX_TRACE_MATCHES_FOR{});
-    const bool trace_for    = not trace_filter.empty() and
+    bool match              = false;
-                           (contains(std::string{location.file_name()}, trace_filter) or
-                            contains(std::string{location.function_name()}, trace_filter));
-    bool match = false;
    each_args(
        [&](auto&& m) {
+            const auto& matcher_name = get_type_name(m);
+            const bool trace_for     = not trace_filter.empty() and
+                                   (contains(std::string{location.file_name()}, trace_filter) or
+                                    contains(std::string{location.function_name()}, trace_filter) or
+                                    contains(matcher_name, trace_filter));
            if(match)
                return;
-            if(trace > 1 or trace_for)
+            if(trace > 1 and trace_for)
-                std::cout << "Match: " << get_type_name(m) << std::endl;
+                std::cout << "Match: " << matcher_name << std::endl;
            auto r = match_instruction(get_module(mod), ins, m.matcher());
            if(r.result == get_module(mod).end())
                return;
            if(trace > 0 or trace_for)
            {
-                std::cout << "Matched by " << get_type_name(m) << std::endl;
+                std::cout << "Matched by " << matcher_name << std::endl;
                get_module(mod).debug_print(ins);
            }
            // If its already invalid dont validate it again
@@ -407,7 +409,7 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
                auto invalid = get_module(mod).validate();
                if(invalid != get_module(mod).end())
                {
-                    std::cout << "Invalid program from match: " << get_type_name(m) << std::endl;
+                    std::cout << "Invalid program from match: " << matcher_name << std::endl;
                    std::cout << "Invalid instructions: " << std::endl;
                    get_module(mod).debug_print(invalid->inputs());
                    get_module(mod).debug_print(invalid);
@@ -621,6 +623,8 @@ MIGRAPHX_PRED_MATCHER(broadcast, instruction_ref ins)
 template <class... Ms>
 auto skip(Ms... ms)
 {
+    static_assert(((not std::is_convertible<Ms, std::string>{}) and ...),
+                  "Use a matcher not a string for skip.");
    auto m = any_of(ms...);
    return make_basic_fun_matcher([=](matcher_context& ctx, instruction_ref start) {
        return fix<optional<instruction_ref>>(

--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -28,6 +28,7 @@
 #include <migraphx/shape.hpp>
 #include <cstring>
 #include <vector>
+#include <migraphx/op/normalize_attribute.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,6 +43,36 @@ struct select_dependent_type
 template <class T, class... Ts>
 using dependent_type = typename select_dependent_type<T, Ts...>::type;
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the axes inputs of the slice operator.
+ *
+ * \param axes the axes to normalize
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
+                                    const shape& input_shape,
+                                    const value& attr_val,
+                                    const std::string& prefix = "");
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the starts and ends inputs of the slice operator.
+ *
+ * \param indices the indices to normalize
+ * \param axes which axes the indices apply over
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
+                                       const std::vector<int64_t>& axes,
+                                       const shape& input_shape,
+                                       const value& attr_val,
+                                       const std::string& prefix = "");
 MIGRAPHX_EXPORT
 bool normalize_attributes(operation& op, const shape& input_shape);

--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
@@ -36,20 +36,48 @@ namespace op {
 struct allocate
 {
    shape s{};
+    // for dynamic allocate to set the buffer type
+    shape::type_t buf_type = shape::half_type;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.s, "shape"));
+        return pack(f(self.s, "shape"), f(self.buf_type, "buf_type"));
    }
    std::string name() const { return "allocate"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        migraphx::check_shapes{inputs, *this, true}.has(0);
+        migraphx::check_shapes{inputs, *this, true}.has(0, 1);
-        return s;
+        // check if shape attribute is not default
+        if(s != shape())
+        {
+            return s;
+        }
+        else
+        {
+            const auto& out_dims = inputs.at(0);
+            assert(not out_dims.dynamic());
+            assert(out_dims.ndim() == 1);
+            std::size_t max_val = std::numeric_limits<std::size_t>::max();
+            std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
+                                                           shape::dynamic_dimension{0, max_val});
+            return {buf_type, dyn_dims};
+        }
    }
-    argument compute(const shape& output_shape, const std::vector<argument>&) const
+    argument compute(const shape& output_shape, const std::vector<argument>& args) const
    {
-        return {output_shape};
+        if(args.empty())
+        {
+            return {output_shape};
+        }
+        else
+        {
+            std::vector<std::size_t> output_dims(output_shape.ndim());
+            args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
+            return {shape{buf_type, output_dims}};
+        }
    }
 };

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -33,8 +33,12 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+// Specifies where to add the "extra" cell of padding if the
+// calculated padding is an odd number.
 // Padding mode is default_ for fixed shape padding.
-// same_lower and same_upper used for dynamic padding.
+// same_lower and same_upper specify dynamic padding.
+// The odd cell goes at the beginning of the dimension
+// (same_lower) or end (same_upper).
 enum padding_mode_t
 {
    default_, // NOLINT

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -206,6 +206,7 @@ struct convolution
        std::vector<std::size_t> new_padding;
        if(padding_mode != op::padding_mode_t::default_)
        {
+            // auto-Calculate the padding sizes with calc_dyn_auto_pad
            auto input_lens   = args[0].get_shape().lens();
            auto weights_lens = args[1].get_shape().lens();
            new_padding =
@@ -217,6 +218,7 @@ struct convolution
        }
        else
        {
+            // Use the padding that was given
            new_padding = padding;
            if(output_shape.dynamic())
            {

--- a/src/include/migraphx/op/convolution_backwards.hpp
+++ b/src/include/migraphx/op/convolution_backwards.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -164,7 +164,7 @@ struct convolution_backwards
            shape win_shape{dyn_out.computed_shape.type(), win_size};
            par_dfor(in_n, wei_c)([&](int o, int k) {
-                shape_for_each(win_shape, [&](auto idx_win) {
+                shape_for_each(win_shape, [&](const auto& idx_win) {
                    const int w = idx_win[0];
                    auto input_dims_start = idx_win.begin() + 1;