Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into develop

b119ed8f · Alan Turner · 26d1a969 · 6f1c947f · b119ed8f · b119ed8f
Commit b119ed8f authored Aug 24, 2023 by Alan Turner
20 changed files
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -55,6 +55,7 @@ jobs:
          -DMIGRAPHX_ENABLE_GPU=On \
          -DMIGRAPHX_ENABLE_CPU=On \
          -DMIGRAPHX_ENABLE_FPGA=On \
+          -DMIGRAPHX_ENABLE_MLIR=On \
          -DBUILD_DEV=On \
          -DROCM_ENABLE_GH_ANNOTATIONS=On \
          -DCLANG_TIDY_DEPEND_ON_TARGET=Off \
@@ -169,6 +170,71 @@ jobs:
        git config --global --add safe.directory /data
        python3 tools/format.py origin/${{ github.event_name == 'pull_request' && github.base_ref || 'develop' }}
+  sles:
+    runs-on: ROCM-Ubuntu
+    steps:
+    - uses: actions/checkout@v3
+      with:
+        fetch-depth: 0
+    # In this step, this action saves a list of existing images,
+    # the cache is created without them in the post run.
+    # It also restores the cache if it exists.
+    - name: Docker layer cache
+      uses: jpribyl/action-docker-layer-caching@v0.1.1
+      with:
+        key: docker-layer-caching-migraphx-sles-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        restore-keys:
+          docker-layer-caching-migraphx-sles-
+      # Ignore the failure of a step and avoid terminating the job.
+      continue-on-error: true
+    - name: Build the Docker image
+      run: docker build . --file tools/docker/sles.docker --tag migraphx-sles
+    - name: Restore cache files for ccache
+      uses: actions/cache/restore@v3 
+      id: ccache_restore
+      with:
+        path: ${{ github.workspace }}/ccache
+        key: ccache-sles-${{ github.ref }}
+        restore-keys: ccache-sles-
+    - name: Build migraphx
+      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx-sles bash < {0}"
+      run: |
+        set -e
+        export CCACHE_COMPRESSLEVEL=10
+        export CCACHE_DIR=/data/ccache
+        export CCACHE_NOHASHDIR=true
+        export CCACHE_BASEDIR=/data
+        export CCACHE_MAXSIZE=1
+        mkdir build
+        cd build
+        CXX=/opt/rocm/llvm/bin/clang++ CC=/opt/rocm/llvm/bin/clang cmake \
+          -DBUILD_DEV=On \
+          -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \
+          -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \
+          ..
+        make -j$(nproc) tests driver
+    - name: Clear ccache cache before saving
+      if: ${{ steps.ccache_restore.outputs.cache-hit }}
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        set +x
+        gh extension install actions/gh-actions-cache --pin v1.0.1
+        gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
+    - name: Save cache files for ccache
+      uses: actions/cache/save@v3 
+      if: always()
+      with:
+        path: ${{ github.workspace }}/ccache
+        key: ccache-sles-${{ github.ref }}
  pyflakes:
    runs-on: ubuntu-20.04
@@ -274,11 +340,10 @@ jobs:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget
        # Look to see if there is a cache hit for the corresponding requirements file
-        key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
+        key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt', 'rbuild.ini') }}
        restore-keys: ${{ matrix.os }}-cget-4-
    - name: Install dependencies
-      if: steps.deps_cache.outputs.cache-hit != 'true'
      run: rbuild prepare -d cget -s gh
    - name: Restore cache files for ccache

--- a/.github/workflows/sync-onnxrt-main.yaml
+++ b/.github/workflows/sync-onnxrt-main.yaml
@@ -47,6 +47,7 @@ jobs:
            onnxruntime
            dependancies
            automated 
+            skip bot checks
          assignees: TedThemistokleous
          reviewers: TedThemistokleous causten
          draft: false

--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,8 +2,49 @@
 Full documentation for MIGraphX is available at [MIGraphX Documentation](https://rocmdocs.amd.com/projects/AMDMIGraphX/en/latest/).
-## MIGraphX 2.5 for ROCm 5.5.0
+## MIGraphX 2.7 for ROCm 5.7.0
+### Added
+- Enabled hipRTC to not require dev packages for migraphx runtime and allow the ROCm install to be in a different directory than it was during build time
+- Add support for multi-target execution
+- Added Dynamic Batch support with C++/Python APIs
+- Add migraphx.create_argument to python API
+- Added dockerfile example for Ubuntu 22.04
+- Add TensorFlow supported ops in driver similar to exist onnx operator list
+- Add a MIGRAPHX_TRACE_MATCHES_FOR env variable to filter the matcher trace
+- Improved debugging by printing max,min,mean and stddev values for TRACE_EVAL = 2
+- use fast_math flag instead of ENV flag for GELU
+- Print message from driver if offload copy is set for compiled program
+### Optimizations
+- Optimized for ONNX Runtime 1.14.0
+- Improved compile times by only building for the GPU on the system
+- Improve performance of pointwise/reduction kernels when using NHWC layouts
+- Load specific version of the migraphx_py library
+- Annotate functions with the block size so the compiler can do a better job of optimizing 
+- Enable reshape on nonstandard shapes
+- Use half HIP APIs to compute max and min
+- Added support for broadcasted scalars to unsqueeze operator
+- Improved multiplies with dot operator
+- Handle broadcasts across dot and concat
+- Add verify namespace for better symbol resolution
+### Fixed
+- Resolved accuracy issues with FP16 resnet50
+- Update cpp generator to handle inf from  float
+- Fix assertion error during verify and make DCE work with tuples
+- Fix convert operation for NaNs
+- Fix shape typo in API test
+- Fix compile warnings for shadowing variable names
+- Add missing specialization for the `nullptr` for the hash function
+### Changed
+- Bumped version of half library to 5.6.0
+- Bumped CI to support rocm 5.6
+- Make building tests optional
+- replace np.bool with bool as per numpy request
+### Removed
+- Removed int8x4 rocBlas calls due to deprecation
+- removed std::reduce usage since not all OS' support it
+## MIGraphX 2.5 for ROCm 5.5.0
 ### Added
 - Y-Model feature to store tuning information with the optimized model
 - Added Python 3.10 bindings 
@@ -12,15 +53,11 @@ Full documentation for MIGraphX is available at [MIGraphX Documentation](https:/
 - Build support for ROCm MLIR
 - Added migraphx-driver flag to print optimizations in python (--python)
 - Added JIT implementation of the Gather and Pad operator which results in better handling of larger tensor sizes.
 ### Optimizations
 - Improved performance of Transformer based models
 - Improved performance of the Pad, Concat, Gather, and Pointwise operators
 - Improved onnx/pb file loading speed
 - Added general optimize pass which runs several passes such as simplify_reshapes/algebra and DCE in loop.
 ### Fixed
 - Improved parsing Tensorflow Protobuf files 
 - Resolved various accuracy issues with some onnx models
@@ -29,6 +66,5 @@ Full documentation for MIGraphX is available at [MIGraphX Documentation](https:/
 - Use --offload-arch instead of --cuda-gpu-arch for the HIP compiler
 - Changes inside JIT to use float accumulator for large reduce ops of half type to avoid overflow.
 - Changes inside JIT to temporarily use cosine to compute sine function.
 ### Changed
 - Changed version/location of 3rd party build dependencies to pick up fixes
--- a/Dockerfile
+++ b/Dockerfile
@@ -77,6 +77,9 @@ ADD dev-requirements.txt /dev-requirements.txt
 ADD requirements.txt /requirements.txt
 ADD rbuild.ini /rbuild.ini
+# Temporarily install a new cmake until switching to ubuntu 22.04
+RUN pip3 install cmake==3.22.1
 COPY ./tools/install_prereqs.sh /
 RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh
 RUN test -f /usr/local/hash || exit 1
@@ -98,6 +101,9 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
 RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF
 RUN cget -p /opt/cmake install kitware/cmake@v3.26.4
+# Install MLIR
+ADD mlir-requirements.txt /mlir-requirements.txt
+RUN cget -p /usr/local install -f /mlir-requirements.txt
 COPY ./test/onnx/.onnxrt-commit /
@@ -113,9 +119,6 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
-# Use the /opt/cmake install because LLVM/MLIR need cmake >= 3.20
-RUN env PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@ea15b3597ce55b9088621818228595dd48fb6ec0 -DBUILD_FAT_LIBROCKCOMPILER=On
 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
 ENV LD_LIBRARY_PATH=$PREFIX/lib

--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -114,10 +114,10 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
        stash includes: 'build/*.deb', name: 'migraphx-package'
    }
-}, hidden_symbols: rocmnode('cdna') { cmake_build ->
+// }, hidden_symbols: rocmnode('cdna') { cmake_build ->
-    stage('Hidden symbols') {
+//     stage('Hidden symbols') {
-        cmake_build(flags: "-DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_VISIBILITY_PRESET=hidden -DCMAKE_C_VISIBILITY_PRESET=hidden")
+//         cmake_build(flags: "-DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_VISIBILITY_PRESET=hidden -DCMAKE_C_VISIBILITY_PRESET=hidden")
-    }
+//     }
 }, all_targets_debug : rocmnode('cdna') { cmake_build ->
    stage('All targets Release') {
        cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") 

--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,5 +26,5 @@ facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
 ccache@v4.1 -DENABLE_TESTING=OFF
 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
 danmar/cppcheck@bb2711c22a0be09efe7f1a8da3030876471026c8 -DHAVE_RULES=1 # 2.11
-RadeonOpenCompute/rocm-cmake@027404a8326da6e7e9338e0b81f9428660190724 --build
+RadeonOpenCompute/rocm-cmake@189d497ed185683154ae9766393b9a10ff21201f --build
 -f requirements.txt
--- a/docs/.sphinx/requirements.in
+++ b/docs/.sphinx/requirements.in
-rocm-docs-core==0.11.0
+rocm-docs-core>=0.20.0
--- a/docs/.sphinx/requirements.txt
+++ b/docs/.sphinx/requirements.txt
 # Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-#
-# This file is autogenerated by pip-compile with Python 3.8
-# by the following command:
-#
-#    pip-compile requirements.in
-#
 accessible-pygments==0.0.4
    # via pydata-sphinx-theme
 alabaster==0.7.13
@@ -46,7 +21,7 @@ charset-normalizer==3.1.0
    # via requests
 click==8.1.3
    # via sphinx-external-toc
-cryptography==40.0.2
+cryptography==41.0.3
    # via pyjwt
 deprecated==1.2.13
    # via pygithub
@@ -60,22 +35,16 @@ fastjsonschema==2.16.3
    # via rocm-docs-core
 gitdb==4.0.10
    # via gitpython
-gitpython==3.1.31
+gitpython==3.1.32
    # via rocm-docs-core
 idna==3.4
    # via requests
 imagesize==1.4.1
    # via sphinx
-importlib-metadata==6.4.1
-    # via sphinx
-importlib-resources==5.12.0
-    # via rocm-docs-core
 jinja2==3.1.2
    # via
    #   myst-parser
    #   sphinx
-linkify-it-py==1.0.3
-    # via myst-parser
 markdown-it-py==2.2.0
    # via
    #   mdit-py-plugins
@@ -86,7 +55,7 @@ mdit-py-plugins==0.3.5
    # via myst-parser
 mdurl==0.1.2
    # via markdown-it-py
-myst-parser[linkify]==1.0.0
+myst-parser==1.0.0
    # via rocm-docs-core
 packaging==23.1
    # via
@@ -109,8 +78,6 @@ pyjwt[crypto]==2.6.0
    # via pygithub
 pynacl==1.5.0
    # via pygithub
-pytz==2023.3
-    # via babel
 pyyaml==6.0
    # via
    #   myst-parser
@@ -120,7 +87,7 @@ requests==2.28.2
    # via
    #   pygithub
    #   sphinx
-rocm-docs-core==0.11.0
+rocm-docs-core>=0.20.0
    # via -r requirements.in
 smmap==5.0.0
    # via gitdb
@@ -163,13 +130,7 @@ sphinxcontrib-serializinghtml==1.1.5
    # via sphinx
 typing-extensions==4.5.0
    # via pydata-sphinx-theme
-uc-micro-py==1.0.1
-    # via linkify-it-py
 urllib3==1.26.15
    # via requests
 wrapt==1.15.0
    # via deprecated
-zipp==3.15.0
-    # via
-    #   importlib-metadata
-    #   importlib-resources
--- a/hip-clang.docker
+++ b/hip-clang.docker
@@ -54,5 +54,12 @@ ADD dev-requirements.txt /dev-requirements.txt
 ADD requirements.txt /requirements.txt
 ADD rbuild.ini /rbuild.ini
+# Temporarily install a new cmake until switching to ubuntu 22.04
+RUN pip3 install cmake==3.22.1
 COPY ./tools/install_prereqs.sh /
 RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh
+# Install MLIR
+ADD mlir-requirements.txt /mlir-requirements.txt
+RUN cget -p /usr/local install -f /mlir-requirements.txt
--- a/mlir-requirements.txt
+++ b/mlir-requirements.txt
+#####################################################################################
+# The MIT License (MIT)
+#
+# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#####################################################################################
+ROCmSoftwarePlatform/rocMLIR@3657f509bfed86bb79d5c6e24aa237e48f09f9f3 -DBUILD_FAT_LIBROCKCOMPILER=On
--- a/rbuild.ini
+++ b/rbuild.ini
@@ -6,7 +6,9 @@ deps =
    -f requirements.txt
 [gh]
-ignore = danmar/cppcheck
+ignore =
+    danmar/cppcheck
+    ROCmSoftwarePlatform/rocMLIR
 deps =
    -f dev-requirements.txt
    oneapi-src/oneDNN@v1.7

--- a/src/eliminate_contiguous.cpp
+++ b/src/eliminate_contiguous.cpp
@@ -35,6 +35,8 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS)
 static bool try_compute_shape(instruction_ref ins,
                              const std::vector<shape>& inputs,
                              const std::vector<module_ref>& mods)
@@ -78,14 +80,26 @@ static bool try_compute_shape(instruction_ref ins,
                return (arg == ins) ? new_shape : arg->get_shape();
            });
-            if(not try_compute_shape(output, input_shapes, mods))
+            if(not try_compute_shape(output, input_shapes, output->module_inputs()))
            {
                return false;
            }
        }
    }
+    catch(const std::exception& e)
+    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Exception: " << e.what() << std::endl;
+        }
+        return false;
+    }
    catch(...)
    {
+        if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+        {
+            std::cout << "Unknown exception" << std::endl;
+        }
        return false;
    }
@@ -127,6 +141,11 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
        {
            if(arg->name() != op_name)
                continue;
+            if(enabled(MIGRAPHX_TRACE_ELIMINATE_CONTIGUOUS{}))
+            {
+                std::cout << "eliminate_contiguous: ";
+                m.debug_print(ins);
+            }
            auto prev = arg->inputs().front();
            replace(new_args, arg, prev);
            if(try_compute_shape(ins, new_args, mod_args))

--- a/src/include/migraphx/algorithm.hpp
+++ b/src/include/migraphx/algorithm.hpp
@@ -26,6 +26,8 @@
 #include <algorithm>
 #include <numeric>
+#include <string>
+#include <vector>
 #include <migraphx/config.hpp>
 namespace migraphx {

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/include/migraphx/normalize_attributes.hpp
+++ b/src/include/migraphx/normalize_attributes.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -28,6 +28,7 @@
 #include <migraphx/shape.hpp>
 #include <cstring>
 #include <vector>
+#include <migraphx/op/normalize_attribute.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -42,6 +43,36 @@ struct select_dependent_type
 template <class T, class... Ts>
 using dependent_type = typename select_dependent_type<T, Ts...>::type;
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the axes inputs of the slice operator.
+ *
+ * \param axes the axes to normalize
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
+                                    const shape& input_shape,
+                                    const value& attr_val,
+                                    const std::string& prefix = "");
+/**
+ * Used to normalize variable input axes at model runtime.
+ * Example: the starts and ends inputs of the slice operator.
+ *
+ * \param indices the indices to normalize
+ * \param axes which axes the indices apply over
+ * \param input_shape shape of the input tensor
+ * \param attr_val the normalize_axes attributes from the operator
+ * \param prefix error message prefix
+ */
+std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
+                                       const std::vector<int64_t>& axes,
+                                       const shape& input_shape,
+                                       const value& attr_val,
+                                       const std::string& prefix = "");
 MIGRAPHX_EXPORT
 bool normalize_attributes(operation& op, const shape& input_shape);

--- a/src/include/migraphx/op/slice.hpp
+++ b/src/include/migraphx/op/slice.hpp
@@ -27,19 +27,34 @@
 #include <migraphx/check_shapes.hpp>
 #include <migraphx/argument.hpp>
 #include <migraphx/config.hpp>
-#include <migraphx/dyn_output.hpp>
 #include <migraphx/value.hpp>
+#include <migraphx/dyn_output.hpp>
 #include <migraphx/op/normalize_attribute.hpp>
+#include <migraphx/normalize_attributes.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+/**
+ * Slice operator that accepts variable axes, starts and ends.
+ *
+ * Attributes:
+ * axes: constant axes to slice over (optional)
+ * starts: constant slice starting indices (optional)
+ * ends: constant slice ending indices (optional)
+ *
+ * Parameters:
+ * data: the input tensor to slice (dynamic or static shape)
+ * input_starts: starting indicies of slice (optional, static shape)
+ * input_ends: ending indicies of slice (optional, static shape)
+ * input_axes: axes to slice over (optional, static shape)
+ */
 struct slice
 {
-    std::vector<int64_t> axes;
+    std::vector<int64_t> axes{};
-    std::vector<int64_t> starts;
+    std::vector<int64_t> starts{};
-    std::vector<int64_t> ends;
+    std::vector<int64_t> ends{};
    template <class Self, class F>
    static auto reflect(Self& self, F f)
@@ -48,8 +63,8 @@ struct slice
    }
    /**
-     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are in
+     * Ensure that attribute vectors axes, starts, and ends are all the same size and values are
-     * limits.
+     * within limits.
     */
    value attributes() const
    {
@@ -70,6 +85,90 @@ struct slice
    std::string name() const { return "slice"; }
+    /**
+     * Computes the slice output shape dimensions for given starts, ends,and axes.
+     * Templated to also handle tensor views.
+     * Possibily different type between [in_starts, in_ends] and [in_axes] if in_axes is this
+     * object's axes attribute. Assumes in_starts and in_ends are normalized; in_axes are valid.
+     */
+    template <class A, class B>
+    std::vector<std::size_t>
+    lens_calc(const std::vector<std::size_t>& lengths, A in_starts, A in_ends, B in_axes) const
+    {
+        auto new_lens = lengths;
+        for(std::size_t i = 0; i < in_axes.size(); ++i)
+        {
+            auto axis      = in_axes[i];
+            new_lens[axis] = in_ends[i] - in_starts[i];
+        }
+        return new_lens;
+    }
+    shape normalize_compute_shape(std::vector<shape> inputs) const
+    {
+        check_shapes{inputs, *this, true}.has(1, 3, 4);
+        auto input_shape = inputs[0];
+        if(inputs.size() == 1)
+        {
+            auto t = input_shape.type();
+            if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
+                   return not input_shape.dyn_dims()[axis].is_fixed();
+               }))
+            {
+                MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            }
+            if(input_shape.dynamic())
+            {
+                return shape{t,
+                             lens_calc(input_shape.min_lens(), starts, ends, axes),
+                             lens_calc(input_shape.max_lens(), starts, ends, axes),
+                             {}};
+            }
+            else
+            {
+                return shape{
+                    t, lens_calc(input_shape.lens(), starts, ends, axes), input_shape.strides()};
+            }
+        }
+        else
+        {
+            // check that starts, ends, and optionally input_axes are all 1D, have the same
+            // dimension, and are static
+            check_shapes{inputs.begin() + 1,
+                         inputs.end(),
+                         std::string("SLICE: inputs (starts, ends, and input_axes)"),
+                         false}
+                .only_dims(1)
+                .same_dims();
+            auto dds = input_shape.to_dynamic().dyn_dims();
+            if(inputs.size() == 3)
+            {
+                if(inputs[1].lens().at(0) != axes.size())
+                {
+                    MIGRAPHX_THROW("SLICE: inputs starts and ends do not have the same dimension "
+                                   "as the axes attribute");
+                }
+                std::for_each(axes.cbegin(), axes.cend(), [&](const auto& axis) {
+                    dds.at(axis) = {0, dds.at(axis).max};
+                });
+            }
+            else
+            {
+                // if axes is an input, then all the output dimensions could be 0 to the max value
+                std::transform(dds.begin(), dds.end(), dds.begin(), [](auto dd) {
+                    return shape::dynamic_dimension{0, dd.max};
+                });
+            }
+            return shape{input_shape.type(), dds};
+        }
+    }
+    /**
+     * Calculates the starting offset for the sliced tensor.
+     * Used in compute when only data input and all other information are in the attributes.
+     *
+     * \param s static input shape
+     */
    auto compute_offset(const shape& s) const
    {
        const std::vector<std::size_t>& lens    = s.lens();
@@ -90,80 +189,131 @@ struct slice
                offset += starts[axis] * strides[axis];
            }
        }
-        return offset;
+        return offset * s.type_size();
    }
-    shape normalize_compute_shape(std::vector<shape> inputs) const
+    /**
+     * Calculates the starting offset for the sliced tensor (for aliasing).
+     * Used when the starts and/or the axes are inputs.
+     *
+     * \param s static input shape
+     * \param input_starts starting indices of slice
+     * \param ax_vec axes to slice on
+     */
+    template <class IndView, class Axes>
+    auto compute_offset(const shape& s, const IndView& input_starts, const Axes& ax_vec) const
    {
-        check_shapes{inputs, *this, true}.has(1);
+        auto ret = 0;
-        auto input_shape = inputs[0];
+        for(std::size_t i = 0; i < ax_vec.size(); ++i)
-        auto t           = input_shape.type();
-        // TODO:  When support for dynamic shapes is added to normalize_attributes,
-        //  remove this restriction.
-        if(input_shape.dynamic() and std::any_of(axes.begin(), axes.end(), [&](auto axis) {
-               return not input_shape.dyn_dims()[axis].is_fixed();
-           }))
        {
-            MIGRAPHX_THROW("SLICE: slicing is not allowed on non-fixed dynamic input axis ");
+            auto axis = ax_vec[i];
+            ret += input_starts[i] * s.strides().at(axis);
        }
+        return ret * s.type_size();
+    }
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(const shape& input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   this->axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input_ends")}};
+    }
+    /**
+     * Three input version of the normalize_inputs.
+     * This one also checks that the input_axes are valid.
+     */
+    std::unordered_map<std::string, std::vector<int64_t>>
+    normalize_inputs(shape input_shape,
+                     const std::vector<int64_t>& input_starts,
+                     const std::vector<int64_t>& input_ends,
+                     const std::vector<int64_t>& input_axes) const
+    {
+        auto attrs = this->attributes().at("normalize_axes");
+        auto norm_axes =
+            normalize_axes(input_axes, input_shape, attrs.at("axes"), "Slice variable input_axes");
+        return {{"input_starts",
+                 normalize_indices(input_starts,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("starts"),
+                                   "Slice variable input_starts")},
+                {"input_ends",
+                 normalize_indices(input_ends,
+                                   norm_axes,
+                                   input_shape,
+                                   attrs.at("ends"),
+                                   "Slice variable input ends")},
+                {"input_axes", norm_axes}};
+    }
-        // For a static shape, old_lens will be adjusted to a new size
+    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
-        // for those axes that are sliced.
+    {
-        // For dynamic shape, the adjusted old_lens become the new max values,
+        auto input       = args[0];
-        // while updating the old mins and optimals if possible.
+        auto input_shape = input.get_shape();
-        std::vector<std::size_t> new_mins;
+        switch(args.size())
-        std::vector<std::size_t> old_lens;
-        std::vector<std::size_t> old_strides;
-        // Doesn't handle optimals
-        if(input_shape.dynamic())
        {
-            old_lens = input_shape.max_lens();
+        case 1: {
-            new_mins = input_shape.min_lens();
+            std::size_t offset = compute_offset(input_shape);
+            return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
        }
-        else
+        case 3: {
-        {
+            shape calc_shape;
-            old_lens = input_shape.lens();
+            std::size_t offset = 0;
-            // For static shape (including during eval step after a dynamic input) the strides are
+            visit_all(args[1], args[2])([&](auto input_starts, auto input_ends) {
-            // indexed into the pre-slice array, so they are larger than the apparent size of the
+                auto norm_inputs = normalize_inputs(input_shape,
-            // resulting shape.
+                                                    input_starts.template to_vector<int64_t>(),
-            old_strides = input_shape.strides();
+                                                    input_ends.template to_vector<int64_t>());
+                offset = compute_offset(input_shape, norm_inputs.at("input_starts"), this->axes);
+                calc_shape = {input_shape.type(),
+                              lens_calc(input_shape.lens(),
+                                        norm_inputs.at("input_starts"),
+                                        norm_inputs.at("input_ends"),
+                                        this->axes),
+                              input_shape.strides()};
+            });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
+        case 4: {
-        std::vector<std::size_t> new_lens = old_lens;
+            shape calc_shape;
-        for(std::size_t i = 0; i < axes.size(); i++)
+            std::size_t offset = 0;
-        {
+            visit_all(args[1], args[2], args[3])(
-            auto axis            = axes[i];
+                [&](auto input_starts, auto input_ends, auto input_axes) {
-            size_t sliced_length = ends[i] - starts[i];
+                    auto norm_inputs = normalize_inputs(input_shape,
-            // A Numpy indexing convention: a slice size larger than the actual dimension
+                                                        input_starts.template to_vector<int64_t>(),
-            // is legal and the "ends" value is clipped to the axis size
+                                                        input_ends.template to_vector<int64_t>(),
-            new_lens[axis] = std::min(new_lens[axis], sliced_length);
+                                                        input_axes.template to_vector<int64_t>());
-            if(input_shape.dynamic())
+                    offset           = compute_offset(
-            {
+                        input_shape, norm_inputs.at("input_starts"), norm_inputs.at("input_axes"));
-                // TODO: when non-fixed shape slicing is allowed, this will be different than
+                    calc_shape = shape{input_shape.type(),
-                // sliced_length, making use of TBD start/end values.
+                                       lens_calc(input_shape.lens(),
-                std::size_t sliced_min_length = ends[i] - starts[i];
+                                                 norm_inputs.at("input_starts"),
-                // if the slice size is smaller than maxes but larger than mins
+                                                 norm_inputs.at("input_ends"),
-                new_mins[axis] = std::min(sliced_min_length, new_mins[axis]);
+                                                 norm_inputs.at("input_axes")),
-            }
+                                       input_shape.strides()};
+                });
+            return {calc_shape, [=] { return input.data() + offset; }};
        }
-        if(input_shape.dynamic())
+        default: {
-        {
+            // Should never get here; covering in case some code change occurs
-            return shape{t, new_mins, new_lens, {}};
+            MIGRAPHX_THROW("SLICE: invalid number of inputs");
        }
-        else
-        {
-            return shape{t, new_lens, old_strides};
        }
    }
-    argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
-    {
-        auto input = args[0];
-        auto offset = compute_offset(input.get_shape()) * dyn_out.computed_shape.type_size();
-        return {dyn_out.computed_shape, [=] { return input.data() + offset; }};
-    }
    std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 0; }
 };

--- a/src/memory_coloring.cpp
+++ b/src/memory_coloring.cpp
@@ -23,9 +23,9 @@
 */
 #include <migraphx/memory_coloring.hpp>
 #include <migraphx/module.hpp>
-#include <migraphx/operators.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/iterator_for.hpp>
+#include <migraphx/make_op.hpp>
 #include <migraphx/functional.hpp>
 #include <migraphx/algorithm.hpp>
 #include <migraphx/ranges.hpp>
@@ -382,7 +382,8 @@ void memory_coloring::apply(module& m) const
        auto s             = ins->get_shape();
        std::size_t offset = seg.first * alignment;
        assert(offset < n);
-        m.replace_instruction(ins, op::load{s, offset}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(s)}, {"offset", offset}}), mem);
    }
    // Replace zero allocation
@@ -391,7 +392,8 @@ void memory_coloring::apply(module& m) const
        if(ins->name() != allocation_op)
            continue;
        assert(ins->get_shape().bytes() == 0);
-        m.replace_instruction(ins, op::load{ins->get_shape(), 0}, mem);
+        m.replace_instruction(
+            ins, make_op("load", {{"shape", to_value(ins->get_shape())}, {"offset", 0}}), mem);
    }
    // Remove scratch parameter if its not used

--- a/src/normalize_attributes.cpp
+++ b/src/normalize_attributes.cpp
@@ -49,6 +49,10 @@ auto tune_attribute(const std::vector<int64_t>& vec,
                    Message m)
 {
    std::vector<int64_t> result(vec);
+    if(result.empty())
+    {
+        return result;
+    };
    int64_t n_rank                                 = input_shape.ndim();
    std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>();
    if(contains(vec_attrs, op::normalize_attribute::use_output))
@@ -251,5 +255,22 @@ bool normalize_attributes(operation& op, const shape& input_shape)
    return tuned;
 }
+std::vector<int64_t> normalize_axes(const std::vector<int64_t>& axes,
+                                    const shape& input_shape,
+                                    const value& attr_val,
+                                    const std::string& prefix)
+{
+    return tune_attribute(axes, {}, attr_val, input_shape, [&] { return prefix; });
+}
+std::vector<int64_t> normalize_indices(const std::vector<int64_t>& indices,
+                                       const std::vector<int64_t>& axes,
+                                       const shape& input_shape,
+                                       const value& attr_val,
+                                       const std::string& prefix)
+{
+    return tune_attribute(indices, axes, attr_val, input_shape, [&] { return prefix; });
+}
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx
--- a/src/onnx/include/migraphx/onnx/onnx_parser.hpp
+++ b/src/onnx/include/migraphx/onnx/onnx_parser.hpp
@@ -117,6 +117,7 @@ struct onnx_parser
    parse_graph(module* mod, const onnx::GraphProto& graph, bool inlining = false);
    literal parse_value(const onnx::AttributeProto& attr) const;
    literal parse_tensor(const onnx::TensorProto& t) const;
+    shape parse_type(const onnx::TypeProto& t) const;
    shape parse_type(const onnx::TypeProto& t, const std::vector<std::size_t>& input_dims) const;
 };

--- a/src/onnx/onnx_parser.cpp
+++ b/src/onnx/onnx_parser.cpp
@@ -357,10 +357,9 @@ parse_inputs(const onnx_parser& parser,
            }
            shape s;
-            std::vector<std::size_t> dims;
            if(parser.map_input_dims.count(name) > 0)
            {
-                dims = parser.map_input_dims.at(name);
+                std::vector<std::size_t> dims = parser.map_input_dims.at(name);
                s    = parser.parse_type(input.type(), dims);
            }
            else if(parser.map_dyn_input_dims.count(name) > 0)
@@ -370,7 +369,7 @@ parse_inputs(const onnx_parser& parser,
            }
            else
            {
-                s = parser.parse_type(input.type(), dims);
+                s = parser.parse_type(input.type());
            }
            mod_insts[name] = mod->add_parameter(name, s);
        }
@@ -553,14 +552,9 @@ literal onnx_parser::parse_tensor(const onnx::TensorProto& t) const
    }
    MIGRAPHX_THROW("PARSE_TENSOR: Invalid tensor type");
 }
-shape onnx_parser::parse_type(const onnx::TypeProto& t,
+shape onnx_parser::parse_type(const onnx::TypeProto& t) const
-                              const std::vector<std::size_t>& input_dims) const
 {
    shape::type_t shape_type = get_type(t.tensor_type().elem_type());
-    if(not input_dims.empty())
-    {
-        return {shape_type, input_dims};
-    }
    std::vector<shape::dynamic_dimension> dynamic_dims;
    auto&& tensor_dims = t.tensor_type().shape().dim();
@@ -590,6 +584,15 @@ shape onnx_parser::parse_type(const onnx::TypeProto& t,
    return shape_from_dyn_dims(shape_type, dynamic_dims);
 }
+shape onnx_parser::parse_type(const onnx::TypeProto& t,
+                              const std::vector<std::size_t>& input_dims) const
+{
+    shape::type_t shape_type = get_type(t.tensor_type().elem_type());
+    if(input_dims.empty())
+        return {shape_type};
+    return {shape_type, input_dims};
+}
 shape::type_t get_type(int dtype)
 {
    switch(dtype)