Merge branch 'develop' into ck-flash-attn

a045fb19 · Alan Turner · 135eb63e · 434a06cf · a045fb19 · a045fb19
Commit a045fb19 authored Sep 27, 2023 by Alan Turner
20 changed files
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -8,6 +8,12 @@ on:
      - master
      - 'release/**'
+env:
+  DOCKER_USER: ${{secrets.DOCKERHUB_USERID}}
+  DOCKER_TOKEN: ${{secrets.DOCKERHUB_TOKEN}}
+  DOCKER_IMAGE_UBUNTU: "rocm/migraphx-ci-ubuntu"
+  DOCKER_IMAGE_SLES: "rocm/migraphx-ci-sles"
 jobs:
  cancel:
@@ -17,22 +23,102 @@ jobs:
        uses: styfle/cancel-workflow-action@0.11.0
        with:
          access_token: ${{ github.token }}
-  tidy:
+  check_image:
+    name: Check if image exists in registry
+    runs-on: ubuntu-latest
+    outputs:
+      imageexists:  ${{ steps.check_image.outputs.imageexists }}
+      imagetag:  ${{ steps.image_hash.outputs.imagetag }}
+      imageexists_sles:  ${{ steps.check_image.outputs.imageexists_sles }}
+      imagetag_sles:  ${{ steps.image_hash.outputs.imagetag_sles }}
+    steps: 
+      - name: Checkout Code
+        uses: actions/checkout@v3
+      - name: Create Image Tag
+        id: image_hash
+        run: |
+          echo "imagetag=hip-clang-${{hashFiles('**/hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', '**/rbuild.ini')}}" >> $GITHUB_OUTPUT
+          echo "imagetag_sles=hip-clang-${{hashFiles('**/tools/docker/sles.docker', '**/*requirements.txt', '**/install_prereqs.sh', '**/rbuild.ini')}}" >> $GITHUB_OUTPUT
+      - name: Check if image is built already
+        id: check_image
+        env:
+          DOCKER_TAG_UBUNTU: ${{ steps.image_hash.outputs.imagetag }}
+          DOCKER_TAG_SLES: ${{ steps.image_hash.outputs.imagetag_sles }}
+        run: |
+          if [[ "$(docker manifest inspect $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU 2> /dev/null)" != "" ]]; then
+            echo "imageexists=true" >> $GITHUB_OUTPUT
+            echo "Image already exists, skip building available"
+          else
+            echo "imageexists=false" >> $GITHUB_OUTPUT
+            echo "Tag does not exist, build and publishing required"
+          fi
+          if [[ "$(docker manifest inspect $DOCKER_IMAGE_SLES:$DOCKER_TAG_SLES 2> /dev/null)" != "" ]]; then
+            echo "imageexists_sles=true" >> $GITHUB_OUTPUT
+            echo "SLES Image already exists, skip building available"
+          else
+            echo "imageexists_sles=false" >> $GITHUB_OUTPUT
+            echo "SLES Tag does not exist, build and publishing required"
+          fi
+  build_image:
+    name: Build image
    runs-on: ROCM-Ubuntu
+    needs: check_image
+    if: ${{ needs.check_image.outputs.imageexists != 'true' }}
    steps:
    - uses: actions/checkout@v3
-    # In this step, this action saves a list of existing images,
+    - name: Build and publish 
-    # the cache is created without them in the post run.
+      env:            
-    # It also restores the cache if it exists.
+        DOCKER_TAG_UBUNTU: ${{ needs.check_image.outputs.imagetag }}
-    - name: Docker layer cache
+      run: |
-      uses: jpribyl/action-docker-layer-caching@v0.1.1
+        # The TOKEN and USERID are github secrets, Action failures at this step
-      with:
+        # can come from a PR from a fork changing a file which forces a rebuild
-        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        # Resolve by making an internal PR of the Forked PR
-        restore-keys:
+        echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
-          docker-layer-caching-migraphx-
-      # Ignore the failure of a step and avoid terminating the job.
+        docker pull $DOCKER_IMAGE_UBUNTU:latest || true
-      continue-on-error: true
+        docker build . --file hip-clang.docker --cache-from $DOCKER_IMAGE_UBUNTU:latest --tag $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU --tag $DOCKER_IMAGE_UBUNTU:latest;
+        docker push $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU;
+        docker push $DOCKER_IMAGE_UBUNTU:latest;
+  build_SLES_image:
+    name: Build SLES image
+    runs-on: ROCM-Ubuntu
+    needs: check_image
+    if: ${{ needs.check_image.outputs.imageexists_sles != 'true' }}
+    steps:
+    - uses: actions/checkout@v3
+    - name: Build and publish SLES
+      env:            
+        DOCKER_TAG_SLES: ${{ needs.check_image.outputs.imagetag_sles }}
+      run: |
+        # The TOKEN and USERID are github secrets, Action failures at this step
+        # can come from a PR from a fork changing a file wichi forces a rebuild
+        # Resolve by making an internal PR of the Forked PR
+        echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
+        docker pull $DOCKER_IMAGE_SLES:latest || true
+        docker build . --file ./tools/docker/sles.docker --cache-from $DOCKER_IMAGE_SLES:latest --tag $DOCKER_IMAGE_SLES:$DOCKER_TAG_SLES --tag $DOCKER_IMAGE_SLES:latest;
+        docker push $DOCKER_IMAGE_SLES:$DOCKER_TAG_SLES;
+        docker push $DOCKER_IMAGE_SLES:latest;
+  tidy:
+    runs-on: ROCM-Ubuntu
+    needs: [ build_image, check_image ]
+    env:
+      DOCKER_TAG_UBUNTU: ${{ needs.check_image.outputs.imagetag }}
+    if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
+    steps: 
+    - uses: actions/checkout@v3
    - name: Restore cache files for tidy
      uses: actions/cache/restore@v3 
@@ -42,12 +128,8 @@ jobs:
        key: tidy-cache-${{ github.ref }}
        restore-keys: tidy-cache-
-    - name: Build the Docker image
+    - name: Clang Tidy
-      run: |
+      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU bash < {0}"
-        docker build . --file hip-clang.docker --tag migraphx
-    - name: Clang tidy
-      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}"
      run: |
        mkdir build
        cd build
@@ -65,6 +147,7 @@ jobs:
    # GH actions can not update existing cache, as a workaround clear cache and then save it
    - name: Clear tidy cache before saving
+      continue-on-error: true
      if: ${{ steps.tidy_restore.outputs.cache-hit }}
      shell: bash
      env:
@@ -72,7 +155,6 @@ jobs:
      run: |
        gh extension install actions/gh-actions-cache --pin v1.0.1
        gh actions-cache delete ${{ steps.tidy_restore.outputs.cache-matched-key }} --confirm
-      continue-on-error: true
    - name: Save cache files for tidy
      uses: actions/cache/save@v3 
@@ -84,21 +166,14 @@ jobs:
  cppcheck:
    runs-on: ROCM-Ubuntu
+    needs: [ build_image, check_image ]
+    env:
+      DOCKER_TAG_UBUNTU: ${{ needs.check_image.outputs.imagetag }}
+    if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
    steps:
    - uses: actions/checkout@v3
-    # In this step, this action saves a list of existing images,
-    # the cache is created without them in the post run.
-    # It also restores the cache if it exists.
-    - name: Docker layer cache
-      uses: jpribyl/action-docker-layer-caching@v0.1.1
-      with:
-        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
-        restore-keys:
-          docker-layer-caching-migraphx-
-      # Ignore the failure of a step and avoid terminating the job.
-      continue-on-error: true
    - name: Restore cache files for cppcheck
      id: cppcheck_restore
      uses: actions/cache/restore@v3
@@ -107,11 +182,8 @@ jobs:
        key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
        restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
-    - name: Build the Docker image
-      run: docker build . --file hip-clang.docker --tag migraphx
    - name: Cppcheck
-      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}"
+      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU bash < {0}"
      run: |
        mkdir build
        cd build
@@ -124,6 +196,7 @@ jobs:
    # GH actions can not update existing cache, as a workaround clear cache and then save it
    - name: Clear cppcheck cache before saving
+      continue-on-error: true
      if: ${{ steps.cppcheck_restore.outputs.cache-hit }}
      shell: bash
      env:
@@ -131,7 +204,6 @@ jobs:
      run: |
        gh extension install actions/gh-actions-cache --pin v1.0.1
        gh actions-cache delete ${{ steps.cppcheck_restore.outputs.cache-matched-key }} --confirm
-      continue-on-error: true
    - name: Save cache files for cppcheck
      uses: actions/cache/save@v3
@@ -142,29 +214,30 @@ jobs:
  format:
-    runs-on: ROCM-Ubuntu
+    runs-on: ubuntu-latest
+    needs: [ build_image, check_image ]
+    env:
+      DOCKER_TAG_UBUNTU: ${{ needs.check_image.outputs.imagetag }}
+    if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
    steps:
    - uses: actions/checkout@v3
      with:
        fetch-depth: 0
-    # In this step, this action saves a list of existing images,
+    - name: Free space
-    # the cache is created without them in the post run.
+      uses: jlumbroso/free-disk-space@main
-    # It also restores the cache if it exists.
-    - name: Docker layer cache
-      uses: jpribyl/action-docker-layer-caching@v0.1.1
      with:
-        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        tool-cache: true
-        restore-keys:
+        android: true
-          docker-layer-caching-migraphx-
+        dotnet: true
-      # Ignore the failure of a step and avoid terminating the job.
+        haskell: true
-      continue-on-error: true
+        large-packages: true
+        swap-storage: true
-    - name: Build the Docker image
+        docker-images: true
-      run: docker build . --file hip-clang.docker --tag migraphx
    - name: Check formatting
-      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}"
+      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKER_IMAGE_UBUNTU:$DOCKER_TAG_UBUNTU bash < {0}"
      run: |
        set -e
        git config --global --add safe.directory /data
@@ -172,26 +245,16 @@ jobs:
  sles:
    runs-on: ROCM-Ubuntu
+    needs: [ build_SLES_image, check_image ]
+    env:
+      DOCKER_TAG_SLES: ${{ needs.check_image.outputs.imagetag_sles }}
+    if: ${{ !cancelled() && (needs.build_SLES_image.result == 'success' || needs.build_SLES_image.result == 'skipped') }}      
    steps:
    - uses: actions/checkout@v3
      with:
        fetch-depth: 0
-    # In this step, this action saves a list of existing images,
-    # the cache is created without them in the post run.
-    # It also restores the cache if it exists.
-    - name: Docker layer cache
-      uses: jpribyl/action-docker-layer-caching@v0.1.1
-      with:
-        key: docker-layer-caching-migraphx-sles-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
-        restore-keys:
-          docker-layer-caching-migraphx-sles-
-      # Ignore the failure of a step and avoid terminating the job.
-      continue-on-error: true
-    - name: Build the Docker image
-      run: docker build . --file tools/docker/sles.docker --tag migraphx-sles
    - name: Restore cache files for ccache
      uses: actions/cache/restore@v3 
      id: ccache_restore
@@ -201,7 +264,7 @@ jobs:
        restore-keys: ccache-sles-
    - name: Build migraphx
-      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx-sles bash < {0}"
+      shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKER_IMAGE_SLES:$DOCKER_TAG_SLES bash < {0}"
      run: |
        set -e
        export CCACHE_COMPRESSLEVEL=10
@@ -212,6 +275,7 @@ jobs:
        mkdir build
        cd build
        CXX=/opt/rocm/llvm/bin/clang++ CC=/opt/rocm/llvm/bin/clang cmake \
+          -DMIGRAPHX_DISABLE_LARGE_BUFFER_TESTS=On \
          -DBUILD_DEV=On \
          -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \
          -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \
@@ -219,12 +283,12 @@ jobs:
        make -j$(nproc) tests driver
    - name: Clear ccache cache before saving
+      continue-on-error: true
      if: ${{ steps.ccache_restore.outputs.cache-hit }}
      shell: bash
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      run: |
-        set +x
        gh extension install actions/gh-actions-cache --pin v1.0.1
        gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
@@ -365,6 +429,7 @@ jobs:
        rbuild build -d cget -s gh -T check \
          -DCMAKE_BUILD_TYPE=${{matrix.configuration}} \
          -DMIGRAPHX_ENABLE_PYTHON=${{matrix.configuration == 'release' && 'On' || 'Off'}} \
+          -DMIGRAPHX_DISABLE_LARGE_BUFFER_TESTS=On \
          -DBUILD_DEV=On \
          -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Os -fdebug-prefix-map=$PWD=. -fdebug-types-section -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined" \
          -DCMAKE_CXX_FLAGS_CODECOV="-g1 -Og -fdebug-prefix-map=$PWD=. -fdebug-types-section -fprofile-arcs -ftest-coverage -fno-omit-frame-pointer" \
@@ -374,12 +439,12 @@ jobs:
    # GH actions can not update existing cache, as a workaround clear cache and then save it
    - name: Clear ccache cache before saving
+      continue-on-error: true
      if: ${{ steps.ccache_restore.outputs.cache-hit }}
      shell: bash
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      run: |
-        set +x
        gh extension install actions/gh-actions-cache --pin v1.0.1
        gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
@@ -481,6 +546,7 @@ jobs:
        rbuild build -d cget -s gh -T check \
          -DCMAKE_BUILD_TYPE=${{matrix.configuration}} \
          -DMIGRAPHX_ENABLE_PYTHON=${{matrix.configuration == 'release' && 'On' || 'Off'}} \
+          -DMIGRAPHX_DISABLE_LARGE_BUFFER_TESTS=On \
          -DBUILD_DEV=On \
          -DCMAKE_CXX_FLAGS_DEBUG="-g1 -Os -fdebug-prefix-map=$PWD=. -fdebug-types-section -fno-omit-frame-pointer -fsanitize=undefined -fno-sanitize-recover=undefined" \
          -DCMAKE_CXX_FLAGS_CODECOV="-g1 -Og -fdebug-prefix-map=$PWD=. -fdebug-types-section -fprofile-arcs -ftest-coverage -fno-omit-frame-pointer" \
@@ -491,15 +557,14 @@ jobs:
    # this is a workaround, with GH actions can not update existing cache
    - name: Clear ccache cache before saving
+      continue-on-error: true
      if: ${{ steps.ccache_restore_fpga.outputs.cache-hit }}
      shell: bash
      env:
        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
      run: |
-        set +x
        gh extension install actions/gh-actions-cache
        gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
-      continue-on-error: true
    - name: Save cache files for ccache
      uses: actions/cache/save@v3 

--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -9,6 +9,10 @@ sphinx:
 formats: [htmlzip]
 python:
-   version: "3.8"
   install:
   - requirements: docs/.sphinx/requirements.txt
+build:
+   os: ubuntu-20.04
+   tools:
+      python: "3.8"
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -262,6 +262,7 @@ rocm_enable_cppcheck(
 enable_testing()
 include(ROCMCreatePackage)
+include(ROCMTest)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/lib)
@@ -269,6 +270,7 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
 add_subdirectory(src)
 add_subdirectory(docs)
 if(BUILD_TESTING)
+    rocm_enable_test_package(migraphx)
    add_subdirectory(test)
 endif()
 add_subdirectory(tools)

--- a/cmake/PythonModules.cmake
+++ b/cmake/PythonModules.cmake
@@ -86,7 +86,7 @@ function(py_add_module NAME)
    )
 endfunction()
-set(PYTHON_SEARCH_VERSIONS 2.7 3.5 3.6 3.7 3.8 3.9 3.10)
+set(PYTHON_SEARCH_VERSIONS 3.5 3.6 3.7 3.8 3.9 3.10)
 set(PYTHON_DISABLE_VERSIONS "" CACHE STRING "")
 foreach(PYTHON_DISABLE_VERSION ${PYTHON_DISABLE_VERSIONS})
    list(REMOVE_ITEM PYTHON_SEARCH_VERSIONS ${PYTHON_DISABLE_VERSION})

--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -26,5 +26,5 @@ facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
 ccache@v4.1 -DENABLE_TESTING=OFF
 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
 danmar/cppcheck@bb2711c22a0be09efe7f1a8da3030876471026c8 -DHAVE_RULES=1 # 2.11
-RadeonOpenCompute/rocm-cmake@189d497ed185683154ae9766393b9a10ff21201f --build
+RadeonOpenCompute/rocm-cmake@5a34e72d9f113eb5d028e740c2def1f944619595 --build
 -f requirements.txt
--- a/mlir-requirements.txt
+++ b/mlir-requirements.txt
@@ -21,4 +21,4 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-ROCmSoftwarePlatform/rocMLIR@3657f509bfed86bb79d5c6e24aa237e48f09f9f3 -DBUILD_FAT_LIBROCKCOMPILER=On
+ROCmSoftwarePlatform/rocMLIR@2c519c48eaa278d13e6c40bc0941119826d71512 -DBUILD_FAT_LIBROCKCOMPILER=On
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
 #####################################################################################
 # The MIT License (MIT)
 #
-# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+# Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -29,6 +29,7 @@ include(ROCMPackageConfigHelpers)
 include(RegisterOp)
 include(CheckCXXLinkerFlag)
 add_library(migraphx 
    adjust_allocation.cpp
    analyze_streams.cpp
@@ -36,6 +37,7 @@ add_library(migraphx
    argument.cpp
    auto_contiguous.cpp
    common.cpp
+    common_dims.cpp
    compile_src.cpp
    convert_to_json.cpp
    cpp_generator.cpp
@@ -94,6 +96,7 @@ add_library(migraphx
    serialize.cpp
    shape.cpp
    simplify_algebra.cpp
+    simplify_dyn_ops.cpp
    simplify_reshapes.cpp
    split_single_dyn_dim.cpp
    target.cpp
@@ -140,6 +143,7 @@ register_migraphx_ops(
    equal
    erf
    exp
+    fill
    flatten
    floor
    fmod
@@ -183,6 +187,8 @@ register_migraphx_ops(
    quant_convolution
    quant_dot
    quantizelinear
+    random_uniform
+    random_seed
    recip
    reduce_max
    reduce_mean

--- a/src/common_dims.cpp
+++ b/src/common_dims.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <migraphx/common_dims.hpp>
+#include <migraphx/ranges.hpp>
+#include <algorithm>
+#include <cassert>
+#include <numeric>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+template <class Iterator>
+static auto compute_end_dim(Iterator start, Iterator last, std::size_t dim)
+{
+    std::size_t x = 1;
+    auto it       = std::find_if(start, last, [&](auto i) {
+        x *= i;
+        return x > dim;
+    });
+    if(x < dim)
+        return start;
+    return it;
+}
+template <class Range>
+static auto elements(const Range& r)
+{
+    return std::accumulate(r.begin(), r.end(), std::size_t{1}, std::multiplies<>{});
+}
+struct common_dim_state
+{
+    common_dim_state(const std::vector<std::size_t>& pdims,
+                     std::vector<std::vector<std::size_t>>& paxes_map)
+        : dims(&pdims), axes_map(&paxes_map), it(dims->begin())
+    {
+    }
+    const std::vector<std::size_t>* dims            = nullptr;
+    std::vector<std::vector<std::size_t>>* axes_map = nullptr;
+    std::vector<std::size_t>::const_iterator it{};
+    std::size_t rem = 1;
+    std::size_t get() const { return *it / rem; }
+    bool is_end() const { return it == dims->end(); }
+    void next(std::size_t i = 1) { it += i; }
+    auto dims_for(std::size_t d) const
+    {
+        auto dim_end = compute_end_dim(it, dims->end(), d);
+        return range(it, dim_end);
+    }
+    void add_axes(std::size_t naxes, std::size_t start) MIGRAPHX_TIDY_CONST
+    {
+        auto axes = compute_axes(naxes, start);
+        axes_map->push_back(std::move(axes));
+    }
+    void add_multi_axes(std::size_t naxes, std::size_t start) MIGRAPHX_TIDY_CONST
+    {
+        auto axes = compute_axes(naxes, start);
+        std::transform(axes.begin(),
+                       axes.end(),
+                       std::back_inserter(*axes_map),
+                       [&](auto axis) -> std::vector<std::size_t> { return {axis}; });
+    }
+    std::vector<std::size_t> compute_axes(std::size_t naxes, std::size_t start) const
+    {
+        if(rem != 1)
+        {
+            assert(start > 0);
+            naxes++;
+            start--;
+        }
+        std::vector<std::size_t> axes(naxes);
+        std::iota(axes.begin(), axes.end(), start);
+        return axes;
+    }
+};
+static bool compute_common_dim(std::vector<std::size_t>& cd_dims,
+                               common_dim_state& state1,
+                               common_dim_state& state2)
+{
+    assert(state1.get() <= state2.get());
+    auto d2    = state2.get();
+    auto dims  = state1.dims_for(d2);
+    auto n     = elements(dims);
+    auto naxes = distance(dims);
+    if(naxes == 0)
+        return false;
+    // If not divisible then we can't compute a common dim
+    if((d2 % n) != 0)
+        return false;
+    auto rem = d2 / n;
+    state1.add_multi_axes(naxes, cd_dims.size());
+    state2.add_axes(rem == 1 ? naxes : naxes + 1, cd_dims.size());
+    state1.rem = rem;
+    state2.rem = 1;
+    cd_dims.insert(cd_dims.end(), dims.begin(), dims.end());
+    if(state1.rem != 1)
+        cd_dims.push_back(state1.rem);
+    state1.next(distance(dims));
+    state2.next();
+    return true;
+}
+common_dims common_dims::compute(const std::vector<std::size_t>& dims1,
+                                 const std::vector<std::size_t>& dims2)
+{
+    assert(elements(dims1) > 0);
+    assert(elements(dims1) == elements(dims2));
+    common_dims cd;
+    common_dim_state state1{dims1, cd.axes_map1};
+    common_dim_state state2{dims2, cd.axes_map2};
+    while(not state1.is_end() and not state2.is_end())
+    {
+        auto d1 = state1.get();
+        auto d2 = state2.get();
+        if(d1 <= d2)
+        {
+            if(not compute_common_dim(cd.dims, state1, state2))
+                return {};
+        }
+        else // if(d1 > d2)
+        {
+            if(not compute_common_dim(cd.dims, state2, state1))
+                return {};
+        }
+    }
+    assert(elements(dims1) == elements(cd.dims));
+    return cd;
+}
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
--- a/src/driver/CMakeLists.txt
+++ b/src/driver/CMakeLists.txt
@@ -45,6 +45,9 @@ if(NOT WIN32)
 endif()
 rocm_clang_tidy_check(driver)
+file(STRINGS "${CMAKE_SOURCE_DIR}/test/onnx/.onnxrt-commit" String_output)
+target_compile_definitions(driver PUBLIC MIGRAPHX_ORT_SHA1="${String_output}")
 target_link_libraries(driver migraphx_all_targets migraphx_onnx migraphx_tf migraphx_py)
 rocm_install_targets(

--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -475,13 +475,15 @@ struct compiler
            {
                if(is_offload_copy_set(p) and not co.offload_copy)
                {
-                    std::cout << "MIGraphX program was likely compiled with offload_copy set, Try "
+                    std::cout
+                        << "[WARNING]: MIGraphX program was likely compiled with offload_copy "
+                           "set, Try "
                           "passing "
                           "`--enable-offload-copy` if program run fails.\n";
                }
                else if(co.offload_copy)
                {
-                    std::cout << "MIGraphX program was likely compiled without "
+                    std::cout << "[WARNING]: MIGraphX program was likely compiled without "
                                 "offload_copy set, Try "
                                 "removing "
                                 "`--enable-offload-copy` flag if passed to driver, if program run "
@@ -802,6 +804,13 @@ int main(int argc, const char* argv[])
    auto&& m = get_commands();
    auto cmd = args.front();
+    if(cmd == "ort-sha")
+    {
+        std::cout << MIGRAPHX_ORT_SHA1 << std::endl;
+        return 0;
+    }
    if(m.count(cmd) > 0)
    {
        m.at(cmd)(argv[0], {args.begin() + 1, args.end()});

--- a/src/driver/verify.cpp
+++ b/src/driver/verify.cpp
@@ -30,6 +30,7 @@
 #include <migraphx/instruction.hpp>
 #include <migraphx/compile_options.hpp>
 #include <migraphx/quantization.hpp>
+#include <migraphx/ranges.hpp>
 namespace migraphx {
 namespace driver {
@@ -83,9 +84,19 @@ void verify_program(const std::string& name,
    std::size_t output_num = x.size();
    for(std::size_t i = 0; i < output_num; ++i)
+    {
+        if(x[i].get_shape().type() != y[i].get_shape().type() or
+           x[i].get_shape().lens() != y[i].get_shape().lens())
+        {
+            std::cout << "FAILED: " << name << std::endl;
+            std::cout << "Shape mismatch {" << x[i].get_shape() << "} != {" << y[i].get_shape()
+                      << "}" << std::endl;
+        }
+        else
        {
            verify_args(name, x[i], y[i], tolerance);
        }
+    }
 }
 void verify_instructions(const program& prog,
@@ -143,11 +154,19 @@ void verify_reduced(program p,
                    double tolerance)
 {
    auto* mm  = p.get_main_module();
-    auto last = std::prev(mm->end(), n + 1);
+    auto last = std::prev(mm->end(), n);
    mm->remove_instructions(last, mm->end());
    std::cout << "Verify: " << n << std::endl;
    std::cout << p << std::endl;
+    try
+    {
        verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
+    }
+    catch(const std::exception& e)
+    {
+        std::cout << "FAILED: " << n << std::endl;
+        std::cout << "Exception: " << e.what() << std::endl;
+    }
 }
 void verify_reduced_program(const program& p,
@@ -160,8 +179,14 @@ void verify_reduced_program(const program& p,
    const auto* mm = p.get_main_module();
    auto n         = std::distance(mm->begin(), mm->end());
    std::cout << "Verify steps: " << n << std::endl;
-    for(std::size_t i = 0; i < n; i++)
+    for(std::size_t i = 1; i < n; i++)
    {
+        auto last = std::prev(mm->end(), i + 1);
+        if(contains({"@literal", "@param"}, last->name()))
+        {
+            std::cout << "Skip: " << i << std::endl;
+            continue;
+        }
        verify_reduced(p, i, t, options, quantize, inputs, tolerance);
    }
 }

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -24,11 +24,14 @@
 #include <migraphx/fuse_pointwise.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/dead_code_elimination.hpp>
+#include <migraphx/simplify_reshapes.hpp>
 #include <migraphx/instruction.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/make_op.hpp>
 #include <migraphx/iterator_for.hpp>
 #include <migraphx/ranges.hpp>
+#include <migraphx/matcher.hpp>
+#include <migraphx/common_dims.hpp>
 #include <iterator>
 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_POINTWISE_FUSION)
@@ -189,6 +192,54 @@ static bool find_pointwise_modules(module& m)
    }
    return changed;
 }
+namespace {
+struct find_pointwise_reshape_pointwise
+{
+    auto matcher() const
+    {
+        auto reshape =
+            match::name("reshape", "squeeze", "unsqueeze", "flatten")(match::used_once());
+        auto skip_contiguous = [](auto... ms) {
+            return match::arg(0)(match::skip(match::name("contiguous")(match::used_once()))(ms...));
+        };
+        auto pointwise         = match::name("pointwise")(match::used_once());
+        auto reshape_pointwise = reshape(skip_contiguous(pointwise.bind("x"))).bind("reshape");
+        return match::name("pointwise")(match::any_of[match::inputs()](reshape_pointwise));
+    }
+    void apply(module& m, const match::matcher_result& r) const
+    {
+        auto ins         = r.result;
+        auto x_ins       = r.instructions["x"];
+        auto reshape_ins = r.instructions["reshape"];
+        auto cd = common_dims::compute(ins->get_shape().lens(), x_ins->get_shape().lens());
+        if(cd.dims.empty())
+            return;
+        auto reshape_input = [&](const auto& ins_to_insert) {
+            return [&](auto input) {
+                auto c = m.insert_instruction(ins_to_insert, make_op("contiguous"), input);
+                return m.insert_instruction(
+                    ins_to_insert, make_op("reshape", {{"dims", cd.dims}}), c);
+            };
+        };
+        auto x_inputs = x_ins->inputs();
+        std::transform(x_inputs.begin(), x_inputs.end(), x_inputs.begin(), reshape_input(x_ins));
+        auto new_x_ins =
+            m.insert_instruction(x_ins, x_ins->get_operator(), x_inputs, x_ins->module_inputs());
+        auto inputs = ins->inputs();
+        std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
+            if(input == reshape_ins)
+                return new_x_ins;
+            return reshape_input(ins)(input);
+        });
+        auto pw = m.insert_instruction(ins, ins->get_operator(), inputs, ins->module_inputs());
+        m.replace_instruction(ins, make_op("reshape", {{"dims", ins->get_shape().lens()}}), pw);
+    }
+};
+} // namespace
 void fuse_pointwise::apply(module_pass_manager& mpm) const
 {
@@ -200,6 +251,8 @@ void fuse_pointwise::apply(module_pass_manager& mpm) const
    }
    for(int i = 0; i < 8; i++)
    {
+        match::find_matches(mpm.get_module(), find_pointwise_reshape_pointwise{});
+        mpm.run_pass(simplify_reshapes{1});
        if(not find_pointwise_modules(mpm.get_module()))
            break;
        mpm.run_pass(dead_code_elimination{});

--- a/src/include/migraphx/check_shapes.hpp
+++ b/src/include/migraphx/check_shapes.hpp
@@ -70,13 +70,19 @@ struct check_shapes
        check_dynamic();
    }
-    template <class Op>
+    template <class Op, MIGRAPHX_REQUIRES(not std::is_convertible<Op, std::string>{})>
    check_shapes(const std::vector<shape>& s, const Op& op, const bool d = false)
        : begin(s.begin()), end(s.end()), name(op.name()), dynamic_allowed(d)
    {
        check_dynamic();
    }
+    check_shapes(const std::vector<shape>& s, const std::string& n, const bool d = false)
+        : begin(s.begin()), end(s.end()), name(n), dynamic_allowed(d)
+    {
+        check_dynamic();
+    }
    void check_dynamic() const
    {
        if(not dynamic_allowed and this->any_of([&](const shape& s) { return s.dynamic(); }))
@@ -147,7 +153,7 @@ struct check_shapes
    {
        if(begin != end)
        {
-            if(begin->max_lens().size() != n)
+            if(begin->ndim() != n)
                MIGRAPHX_THROW(prefix() + "Only " + std::to_string(n) + "d supported");
        }
        return *this;
@@ -162,7 +168,7 @@ struct check_shapes
    {
        if(begin != end)
        {
-            if(begin->max_lens().size() > n)
+            if(begin->ndim() > n)
                MIGRAPHX_THROW(prefix() + "Shape must have at most " + std::to_string(n) +
                               " dimensions");
        }
@@ -178,7 +184,7 @@ struct check_shapes
    {
        if(begin != end)
        {
-            if(begin->max_lens().size() < n)
+            if(begin->ndim() < n)
                MIGRAPHX_THROW(prefix() + "Shape must have at least " + std::to_string(n) +
                               " dimensions");
        }
@@ -228,6 +234,16 @@ struct check_shapes
        return *this;
    }
+    /*!
+     * Check all shapes have the same layout.
+     */
+    const check_shapes& same_layout() const
+    {
+        if(not this->same([](const shape& s) { return find_permutation(s); }))
+            MIGRAPHX_THROW(prefix() + "Layouts do not match");
+        return *this;
+    }
    /*!
     * Check all shapes are standard.
     */
@@ -238,6 +254,16 @@ struct check_shapes
        return *this;
    }
+    /*!
+     * Check all shapes are scalar.
+     */
+    const check_shapes& scalar() const
+    {
+        if(not this->all_of([](const shape& s) { return s.scalar(); }))
+            MIGRAPHX_THROW(prefix() + "Shapes are not a scalar");
+        return *this;
+    }
    /*!
     * Check all shapes are standard or scalar.
     */

--- a/src/include/migraphx/common_dims.hpp
+++ b/src/include/migraphx/common_dims.hpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
+#define MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
+#include <migraphx/config.hpp>
+#include <cstdint>
+#include <vector>
+namespace migraphx {
+inline namespace MIGRAPHX_INLINE_NS {
+/// This will compute a higher dimensional space that will preserve the axes
+/// for both sets of dimensions. Two axes_maps are provided for each of the
+/// dims that will map the axis to the axes that are used by the result of
+/// common_dims.
+struct MIGRAPHX_EXPORT common_dims
+{
+    static common_dims compute(const std::vector<std::size_t>& dims1,
+                               const std::vector<std::size_t>& dims2);
+    std::vector<std::size_t> dims;
+    std::vector<std::vector<std::size_t>> axes_map1;
+    std::vector<std::vector<std::size_t>> axes_map2;
+};
+} // namespace MIGRAPHX_INLINE_NS
+} // namespace migraphx
+#endif // MIGRAPHX_GUARD_MIGRAPHX_COMMON_DIMS_HPP
--- a/src/include/migraphx/convolution.hpp
+++ b/src/include/migraphx/convolution.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -62,7 +62,7 @@ void convolution(Output output, T input, T weights, Padding padding, Stride stri
        shape win_shape{output_shape.type(), win_size};
        double acc = 0.0;
-        shape_for_each(win_shape, [&](auto idx_win) {
+        shape_for_each(win_shape, [&](const auto& idx_win) {
            auto k           = idx_win[0];
            const auto in_ch = group_id * wei_c + k;
            std::vector<std::ptrdiff_t> idx(idx_o.begin(), idx_o.end());

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -381,22 +381,24 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
    const int trace         = value_of(MIGRAPHX_TRACE_MATCHES{});
    const bool validate     = enabled(MIGRAPHX_VALIDATE_MATCHES{});
    const auto trace_filter = string_value_of(MIGRAPHX_TRACE_MATCHES_FOR{});
-    const bool trace_for    = not trace_filter.empty() and
-                           (contains(std::string{location.file_name()}, trace_filter) or
-                            contains(std::string{location.function_name()}, trace_filter));
    bool match              = false;
    each_args(
        [&](auto&& m) {
+            const auto& matcher_name = get_type_name(m);
+            const bool trace_for     = not trace_filter.empty() and
+                                   (contains(std::string{location.file_name()}, trace_filter) or
+                                    contains(std::string{location.function_name()}, trace_filter) or
+                                    contains(matcher_name, trace_filter));
            if(match)
                return;
-            if(trace > 1 or trace_for)
+            if(trace > 1 and trace_for)
-                std::cout << "Match: " << get_type_name(m) << std::endl;
+                std::cout << "Match: " << matcher_name << std::endl;
            auto r = match_instruction(get_module(mod), ins, m.matcher());
            if(r.result == get_module(mod).end())
                return;
            if(trace > 0 or trace_for)
            {
-                std::cout << "Matched by " << get_type_name(m) << std::endl;
+                std::cout << "Matched by " << matcher_name << std::endl;
                get_module(mod).debug_print(ins);
            }
            // If its already invalid dont validate it again
@@ -407,7 +409,7 @@ void find_matches_for(source_location location, Mod& mod, instruction_ref ins, M
                auto invalid = get_module(mod).validate();
                if(invalid != get_module(mod).end())
                {
-                    std::cout << "Invalid program from match: " << get_type_name(m) << std::endl;
+                    std::cout << "Invalid program from match: " << matcher_name << std::endl;
                    std::cout << "Invalid instructions: " << std::endl;
                    get_module(mod).debug_print(invalid->inputs());
                    get_module(mod).debug_print(invalid);
@@ -621,6 +623,8 @@ MIGRAPHX_PRED_MATCHER(broadcast, instruction_ref ins)
 template <class... Ms>
 auto skip(Ms... ms)
 {
+    static_assert(((not std::is_convertible<Ms, std::string>{}) and ...),
+                  "Use a matcher not a string for skip.");
    auto m = any_of(ms...);
    return make_basic_fun_matcher([=](matcher_context& ctx, instruction_ref start) {
        return fix<optional<instruction_ref>>(

--- a/src/include/migraphx/op/allocate.hpp
+++ b/src/include/migraphx/op/allocate.hpp
@@ -36,21 +36,49 @@ namespace op {
 struct allocate
 {
    shape s{};
+    // for dynamic allocate to set the buffer type
+    shape::type_t buf_type = shape::half_type;
    template <class Self, class F>
    static auto reflect(Self& self, F f)
    {
-        return pack(f(self.s, "shape"));
+        return pack(f(self.s, "shape"), f(self.buf_type, "buf_type"));
    }
    std::string name() const { return "allocate"; }
    shape compute_shape(const std::vector<shape>& inputs) const
    {
-        migraphx::check_shapes{inputs, *this, true}.has(0);
+        migraphx::check_shapes{inputs, *this, true}.has(0, 1);
+        // check if shape attribute is not default
+        if(s != shape())
+        {
            return s;
        }
-    argument compute(const shape& output_shape, const std::vector<argument>&) const
+        else
+        {
+            const auto& out_dims = inputs.at(0);
+            assert(not out_dims.dynamic());
+            assert(out_dims.ndim() == 1);
+            std::size_t max_val = std::numeric_limits<std::size_t>::max();
+            std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
+                                                           shape::dynamic_dimension{0, max_val});
+            return {buf_type, dyn_dims};
+        }
+    }
+    argument compute(const shape& output_shape, const std::vector<argument>& args) const
+    {
+        if(args.empty())
        {
            return {output_shape};
        }
+        else
+        {
+            std::vector<std::size_t> output_dims(output_shape.ndim());
+            args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
+            return {shape{buf_type, output_dims}};
+        }
+    }
 };
 } // namespace op

--- a/src/include/migraphx/op/common.hpp
+++ b/src/include/migraphx/op/common.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -33,8 +33,12 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace op {
+// Specifies where to add the "extra" cell of padding if the
+// calculated padding is an odd number.
 // Padding mode is default_ for fixed shape padding.
-// same_lower and same_upper used for dynamic padding.
+// same_lower and same_upper specify dynamic padding.
+// The odd cell goes at the beginning of the dimension
+// (same_lower) or end (same_upper).
 enum padding_mode_t
 {
    default_, // NOLINT

--- a/src/include/migraphx/op/contiguous.hpp
+++ b/src/include/migraphx/op/contiguous.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal

--- a/src/include/migraphx/op/convolution.hpp
+++ b/src/include/migraphx/op/convolution.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -206,6 +206,7 @@ struct convolution
        std::vector<std::size_t> new_padding;
        if(padding_mode != op::padding_mode_t::default_)
        {
+            // auto-Calculate the padding sizes with calc_dyn_auto_pad
            auto input_lens   = args[0].get_shape().lens();
            auto weights_lens = args[1].get_shape().lens();
            new_padding =
@@ -217,6 +218,7 @@ struct convolution
        }
        else
        {
+            // Use the padding that was given
            new_padding = padding;
            if(output_shape.dynamic())
            {