Merge branch 'develop' into simplify-more-reshapes

59386637 · Paul · 6690765c · ed6542ee · 59386637 · 59386637
Commit 59386637 authored Apr 27, 2023 by Paul
20 changed files
--- a/.dockerignore
+++ b/.dockerignore
+# Ignore everything
+**
+
+# Allow files and directories
+!*.txt
+!*.ini
+!/tools/*.sh
+!/doc/*.txt
+!/test/onnx/.onnxrt-commit
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
 name: migraphx

-on: [push, pull_request]
+on: 
+  pull_request:
+  push:
+    branches: 
+      - develop
+      - master
+      - 'release/**'
+

 jobs:
  cancel:
@@ -17,40 +24,29 @@ jobs:
    - name: Free space
      run: |
        sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku 
-        du . --max-depth=1 -h
-        ls -la
-        cd /usr/local
-        du . --max-depth=1 -h
-        ls -la
-        cd /usr/local/lib
-        echo $(pwd)
-        du . --max-depth=1 -h
-        ls -la

    - uses: actions/checkout@v3

    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
    # It also restores the cache if it exists.
-    # name: Docker Layer Caching2
-    - uses: jpribyl/action-docker-layer-caching@v0.1.1
+    - name: Docker layer cache
+      uses: jpribyl/action-docker-layer-caching@v0.1.1
+      with:
+        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        restore-keys:
+          docker-layer-caching-migraphx-
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true

-    - name: Prepare timestamp
-      id: cache_timestamp
-      shell: bash 
-      run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
-
-    - name: Cache files for tidy
-      uses: pat-s/always-upload-cache@v3.0.11
+    - name: Restore cache files for tidy
+      uses: actions/cache/restore@v3 
+      id: tidy_restore
      with:
        path: tidy-cache
-        key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
-        restore-keys: |
-            tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
-            tidy-cache-
-
+        key: tidy-cache-${{ github.ref }}
+        restore-keys: tidy-cache-
+        
    - name: Build the Docker image
      run: docker build . --file hip-clang.docker --tag migraphx

@@ -70,6 +66,25 @@ jobs:
          ..
        make -j2 -k onnx-proto tf-proto tidy

+    # GH actions can not update existing cache, as a workaround clear cache and then save it
+    - name: Clear tidy cache before saving
+      if: ${{ steps.tidy_restore.outputs.cache-hit }}
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        gh extension install actions/gh-actions-cache --pin v1.0.1
+        gh actions-cache delete ${{ steps.tidy_restore.outputs.cache-matched-key }} --confirm
+      continue-on-error: true
+
+    - name: Save cache files for tidy
+      uses: actions/cache/save@v3 
+      if: always()
+      with:
+        path: tidy-cache
+        key: tidy-cache-${{ github.ref }}
+
+
  cppcheck:
    runs-on: ubuntu-20.04

@@ -81,23 +96,22 @@ jobs:
    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
    # It also restores the cache if it exists.
-    - uses: jpribyl/action-docker-layer-caching@v0.1.1
+    - name: Docker layer cache
+      uses: jpribyl/action-docker-layer-caching@v0.1.1
+      with:
+        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        restore-keys:
+          docker-layer-caching-migraphx-
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true

-    - name: Prepare timestamp
-      id: cache_timestamp
-      shell: bash 
-      run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
-
-    - name: Cache files for cppcheck
-      uses: pat-s/always-upload-cache@v2.1.3
+    - name: Restore cache files for cppcheck
+      id: cppcheck_restore
+      uses: actions/cache/restore@v3
      with:
        path: cppcheck-cache
-        key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }}
-        restore-keys: |
-            cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }}
-            cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
+        key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
+        restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-

    - name: Build the Docker image
      run: docker build . --file hip-clang.docker --tag migraphx
@@ -114,6 +128,25 @@ jobs:
          ..
        make -j2 cppcheck

+    # GH actions can not update existing cache, as a workaround clear cache and then save it
+    - name: Clear cppcheck cache before saving
+      if: ${{ steps.cppcheck_restore.outputs.cache-hit }}
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        gh extension install actions/gh-actions-cache --pin v1.0.1
+        gh actions-cache delete ${{ steps.cppcheck_restore.outputs.cache-matched-key }} --confirm
+      continue-on-error: true
+
+    - name: Save cache files for cppcheck
+      uses: actions/cache/save@v3
+      if: always()
+      with:
+        path: cppcheck-cache
+        key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
+
+
  format:
    runs-on: ubuntu-20.04

@@ -125,7 +158,12 @@ jobs:
    # In this step, this action saves a list of existing images,
    # the cache is created without them in the post run.
    # It also restores the cache if it exists.
-    - uses: jpribyl/action-docker-layer-caching@v0.1.1
+    - name: Docker layer cache
+      uses: jpribyl/action-docker-layer-caching@v0.1.1
+      with:
+        key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
+        restore-keys:
+          docker-layer-caching-migraphx-
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true

@@ -206,8 +244,13 @@ jobs:
          - codecov

    steps:
-    - name: Free space
-      run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws*  /usr/local/lib/heroku
+    - name: Free space and install rbuild, lld
+      run: |
+        sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws*  /usr/local/lib/heroku
+        sudo apt-get install -y lld
+        python -m pip install --upgrade pip
+        pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
+
    - uses: actions/checkout@v3
    - name: Set up Python
      uses: actions/setup-python@v4
@@ -217,36 +260,25 @@ jobs:
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
      uses: actions/cache@v3
+      id: deps_cache
      with:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget
        # Look to see if there is a cache hit for the corresponding requirements file
-        key: 
-          ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
-          ${{ matrix.os }}-cget-4-
-
+        key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
+        restore-keys: ${{ matrix.os }}-cget-4-

    - name: Install dependencies
-      run: |
-        python -m pip install --upgrade pip
-        pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
-        rbuild prepare -d cget -s gh
-        sudo apt-get install -y lld
-    - name: Prepare timestamp
-      id: cache_timestamp
-      shell: bash 
-      run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
+      if: steps.deps_cache.outputs.cache-hit != 'true'
+      run: rbuild prepare -d cget -s gh

-    - name: Cache files for ccache
-      # Ignore the failure of a step and avoid terminating the job.
-      continue-on-error: true
-      uses: pat-s/always-upload-cache@v2.1.3
+    - name: Restore cache files for ccache
+      uses: actions/cache/restore@v3 
+      id: ccache_restore
      with:
-        path: ccache
-        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
-        restore-keys: |
-            ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
-            ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
+        path: ${{ github.workspace }}/ccache
+        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
+        restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-

    - name: Build and test
      env:
@@ -266,6 +298,23 @@ jobs:
          -DCMAKE_SHARED_LINKER_FLAGS='-fuse-ld=lld'
        ${{ github.workspace }}/cget/bin/ccache -s

+    # GH actions can not update existing cache, as a workaround clear cache and then save it
+    - name: Clear ccache cache before saving
+      if: ${{ steps.ccache_restore.outputs.cache-hit }}
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        gh extension install actions/gh-actions-cache --pin v1.0.1
+        gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
+
+    - name: Save cache files for ccache
+      uses: actions/cache/save@v3 
+      if: always()
+      with:
+        path: ${{ github.workspace }}/ccache
+        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
+
    - name: Upload code coverage
      if: "matrix.configuration == 'codecov'"
      env:
@@ -309,6 +358,7 @@ jobs:
      uses: actions/setup-python@v4
      with:
        python-version: 3.7
+
    - name: Cache dependencies
      # Ignore the failure of a step and avoid terminating the job.
      continue-on-error: true
@@ -317,9 +367,8 @@ jobs:
        # This path is specific to Ubuntu
        path: ${{ github.workspace }}/cget
        # Look to see if there is a cache hit for the corresponding requirements file
-        key: 
-          ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
-          ${{ matrix.os }}-cget-4-
+        key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
+        restore-keys: ${{ matrix.os }}-cget-4-


    - name: Install dependencies
@@ -328,22 +377,15 @@ jobs:
        pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
        rbuild prepare -d cget -s gh
        sudo apt-get install -y lld
-    - name: Prepare timestamp
-      id: cache_timestamp
-      shell: bash
-      run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT

-    - name: Cache files for ccache
-      # Ignore the failure of a step and avoid terminating the job.
-      continue-on-error: true
-      uses: pat-s/always-upload-cache@v2.1.3
+    - name: Restore cache files for ccache
+      id: ccache_restore_fpga
+      uses: actions/cache/restore@v3 
      with:
-        path: ccache
-        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
-        restore-keys: |
-            ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
-            ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
-
+        path: ${{ github.workspace }}/ccache
+        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
+        restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
+    
    - name: Build and test
      env:
        CMAKE_PREFIX_PATH: ${{ github.workspace }}/cget
@@ -363,17 +405,36 @@ jobs:
          -DMIGRAPHX_ENABLE_FPGA=On
        ${{ github.workspace }}/cget/bin/ccache -s

-    #- name: Upload code coverage
-    #  if: "matrix.configuration == 'codecov'"
-    #  env:
-    #    CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56"
-    #  run: |
-    #    sudo apt-get install -y lcov
-    #    cd build
-    #    lcov --directory . --capture --output-file $(pwd)/coverage.info
-    #    lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
-    #    lcov --list $(pwd)/coverage.info
-    #    curl -Os https://uploader.codecov.io/latest/linux/codecov
-    #    chmod +x codecov
-    #    ./codecov -t ${CODECOV_TOKEN}
-    #    echo "Uploaded"
+    # this is a workaround, with GH actions can not update existing cache
+    - name: Clear ccache cache before saving
+      if: ${{ steps.ccache_restore_fpga.outputs.cache-hit }}
+      shell: bash
+      env:
+        GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+      run: |
+        gh extension install actions/gh-actions-cache
+        gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
+      continue-on-error: true
+
+    - name: Save cache files for ccache
+      uses: actions/cache/save@v3 
+      if: always()
+      with:
+        path: ${{ github.workspace }}/ccache
+        key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
+
+      #- name: Upload code coverage
+      #  if: "matrix.configuration == 'codecov'"
+      #  env:
+      #    CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56"
+      #  run: |
+      #    sudo apt-get install -y lcov
+      #    cd build
+      #    lcov --directory . --capture --output-file $(pwd)/coverage.info
+      #    lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
+      #    lcov --list $(pwd)/coverage.info
+      #    curl -Os https://uploader.codecov.io/latest/linux/codecov
+      #    chmod +x codecov
+      #    ./codecov -t ${CODECOV_TOKEN}
+      #    echo "Uploaded"
+
--- a/.github/workflows/clean-closed-pr-caches.yaml
+++ b/.github/workflows/clean-closed-pr-caches.yaml
+name: Cleanup caches of closed PR
+on:
+  pull_request:
+    types:
+      - closed
+
+jobs:
+  cleanup:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out code
+        uses: actions/checkout@v3
+        
+      - name: Cleanup
+        run: |
+          gh extension install actions/gh-actions-cache --pin v1.0.1
+          
+          REPO=${{ github.repository }}
+          BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"
+
+          echo "Fetching list of cache key"
+          cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 | tail -n +3)
+
+          ## Setting this to not fail the workflow while deleting cache keys. 
+          set +e
+          echo "Deleting caches..."
+          for cacheKey in $cacheKeysForPR
+          do
+              gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
+          done
+          echo "Done"
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/rocm-image-release.yaml
+++ b/.github/workflows/rocm-image-release.yaml
@@ -10,12 +10,38 @@ on:
        description: Repository for benchmark utils
        required: true
        default: 'ROCmSoftwarePlatform/migraphx-benchmark-utils'
+      base_image:
+        description: Base image for rocm Docker build
+        required: true
+        default: "rocm/dev-ubuntu-20.04"
+      docker_image:
+        description: Docker image name for rocm Docker build
+        required: true
+        default: "rocm-migraphx"
+      build_navi:
+        description: Build navi number
+        required: true
+        default: "0"
+      organization:
+        type: string
+        description: Organization based on which location of files will be different
+        required: true
+        default: "AMD"
+      overwrite:
+        type: boolean
+        description: Overwrite image if it already exists
+        required: true

 jobs:
  release:
    uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/rocm-release.yml@main
    with:
-      rocm_release: ${{ github.event.inputs.rocm_release }}
+      rocm_release: ${{ github.event.inputs.rocm_release || '5.1' }}
      benchmark-utils_repo: ${{ github.event.inputs.benchmark-utils_repo || 'ROCmSoftwarePlatform/migraphx-benchmark-utils' }}
+      organization: ${{ github.event.inputs.organization || 'AMD' }}
+      base_image: ${{ github.event.inputs.base_image || 'rocm/dev-ubuntu-20.04' }}
+      docker_image: ${{ github.event.inputs.docker_image || 'rocm-migraphx' }}
+      build_navi: ${{ github.event.inputs.build_navi || '0' }}
+      overwrite: ${{ github.event.inputs.overwrite == 'true' }}
    secrets:
      gh_token: ${{ secrets.MIGRAPHX_BOT_TOKEN }}
--- a/.github/workflows/sync-onnxrt-main.yaml
+++ b/.github/workflows/sync-onnxrt-main.yaml
 name: Onnxruntime main weekly sync
+
 on:
  schedule:
    - cron: '07 17 * * 5'

--- a/Dockerfile
+++ b/Dockerfile
@@ -110,7 +110,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR

 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh

-RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@acb727b348086b58a7f261b32c0e4f0686a4c0ee -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
+RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@55c6ee66cc7502db7950693b3e845676cbf400b1 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off

 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ build MIGraphX. The specific steps are as follows:
 1) Install rocm-cmake, pip3, rocblas, and miopen-hip with the command

 ```
-sudo apt update && sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip
+sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip
 ```

 2) Install [rbuild](https://github.com/RadeonOpenCompute/rbuild) (sudo may be required here.)
@@ -68,14 +68,11 @@ pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
 3) Build MIGraphX source code

 ```
-rbuild build -d depend -B build --cxx=/opt/rocm/llvm/bin/clang++
+rbuild build -d depend -B build
 ```

 then all the prerequisites are in the folder `depend`, and MIGraphX is built in the `build` directory.

-Note that for ROCm3.7 and later releases, Ubuntu 18.04 or later releases are needed. 
-Upgrade to Ubuntu 18.04 is available at [Upgrade Ubuntu to 18.04](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/wiki/Upgrade-to-Ubuntu-18.04-for-ROCM3.7-or-later-releases)
-
 Also note that you may meet the error of `rbuild: command not found`. It is because rbuild is installed 
 at `$HOME/.local/bin`, which is not in `PATH`. You can either export PATH as `export PATH=$HOME/.local/bin:$PATH` 
 to add the folder to `PATH` or add the option `--prefix /usr/local` in the pip3 command when installing rbuild.
@@ -89,7 +86,7 @@ If using this approach, we need to install the prerequisites, configure the cmak
 For convenience, the prerequisites can be built automatically with rbuild as:

 ```
-rbuild build -d depend --cxx=/opt/rocm/llvm/bin/clang++
+rbuild prepare -d depend
 ```

 then all the prerequisites are in the folder `depend`, and they can be used in the `cmake` configuration
@@ -174,7 +171,6 @@ To install:
 dpkg -i <path_to_deb_file>
 ```

-
 ### Calling MIGraphX APIs
 To use MIGraphX's C/C++ API in your cmake project, we need to set `CMAKE_PREFIX_PATH` to the MIGraphX
 installation location and then do 
@@ -184,8 +180,24 @@ target_link_libraries(myApp migraphx::c)
 ```
 Where `myApp` is the cmake target in your project.

+## Building for development
+
+Using rbuild, the dependencies for development can be installed with:
+
+```
+rbuild develop
+```
+
+This will install the dependencies for development into the `deps` directory and
+configure `cmake` to use those dependencies in the `build` directory. These
+directories can be changed by passing the `--deps-dir` and `--build-dir` flags
+to `rbuild` command:
+
+```
+rbuild develop --build-dir build_rocm_55 --deps-dir /home/user/deps_dir
+```

-### Building the documentation
+## Building the documentation

 HTML and PDF documentation can be built using:


--- a/examples/nlp/python_bert_squad/requirements_bertsquad.txt
+++ b/examples/nlp/python_bert_squad/requirements_bertsquad.txt
@@ -21,6 +21,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 # THE SOFTWARE.
 #####################################################################################
-tensorflow==2.9.3
+tensorflow==2.11.1
 onnxruntime
 tokenizers
\ No newline at end of file
--- a/rbuild.ini
+++ b/rbuild.ini
@@ -14,6 +14,7 @@ define =
    CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    MIGRAPHX_ENABLE_CPU=On
+    BUILD_DEV=On

 [develop]
 cxx = ${rocm_path}/llvm/bin/clang++
@@ -25,3 +26,4 @@ define =
    CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
    MIGRAPHX_ENABLE_CPU=On
+    BUILD_DEV=On
--- a/src/cpp_generator.cpp
+++ b/src/cpp_generator.cpp
@@ -106,6 +106,11 @@ cpp_generator::function& cpp_generator::function::set_generic_types(const module
    return *this;
 }

+cpp_generator::function& cpp_generator::function::unused_param(const std::string& pname)
+{
+    body.insert(0, "(void)" + pname + ";\n");
+    return *this;
+}
 cpp_generator::function& cpp_generator::function::add_generic_param(const std::string& pname)
 {
    params.push_back({pname, "T" + pname});
@@ -238,6 +243,8 @@ std::string cpp_generator::create_function(const cpp_generator::function& f)
    std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name;
    impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name;
    char delim = '(';
+    if(f.params.empty())
+        impl->fs << delim;
    for(auto&& p : f.params)
    {
        impl->fs << delim << p.type << " " << p.name;

--- a/src/driver/main.cpp
+++ b/src/driver/main.cpp
@@ -388,12 +388,12 @@ struct compiler_target
 {
 #ifdef HAVE_GPU
    std::string target_name = "gpu";
-#elif HAVE_CPU
+#elif defined(HAVE_CPU)
    std::string target_name = "cpu";
-#elif HAVE_FPGA
-    std::string target_name = "fpga"
+#elif defined(HAVE_FPGA)
+    std::string target_name = "fpga";
 #else
-    std::string target_name = "ref"
+    std::string target_name = "ref";
 #endif

    void parse(argument_parser& ap)

--- a/src/fuse_pointwise.cpp
+++ b/src/fuse_pointwise.cpp
@@ -31,6 +31,8 @@
 #include <migraphx/ranges.hpp>
 #include <iterator>

+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_DISABLE_POINTWISE_FUSION)
+
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

@@ -74,6 +76,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
        std::unordered_map<instruction_ref, instruction_ref> param_map;
        std::vector<instruction_ref> pointwise_inputs;
        std::size_t i = 0;
+
        for(auto input : ins->inputs())
        {
            if(contains(param_map, input))
@@ -92,6 +95,10 @@ static void create_pointwise_modules(module_pass_manager& mpm)
            }
        }

+        // Don't create pointwise module if no inputs are detected
+        if(pointwise_inputs.empty())
+            continue;
+
        std::vector<instruction_ref> inputs;
        std::transform(ins->inputs().begin(),
                       ins->inputs().end(),
@@ -188,6 +195,10 @@ void fuse_pointwise::apply(module_pass_manager& mpm) const
 {
    create_pointwise_modules(mpm);
    mpm.run_pass(dead_code_elimination{});
+    if(enabled(MIGRAPHX_DISABLE_POINTWISE_FUSION{}))
+    {
+        return;
+    }
    for(int i = 0; i < 8; i++)
    {
        if(not find_pointwise_modules(mpm.get_module()))

--- a/src/include/migraphx/cpp_generator.hpp
+++ b/src/include/migraphx/cpp_generator.hpp
@@ -78,6 +78,7 @@ struct cpp_generator
        function& set_types(const module& m, const std::function<std::string(shape)>& parse);
        function& set_generic_types(const module& m);
        function& add_generic_param(const std::string& pname);
+        function& unused_param(const std::string& pname);
    };

    cpp_generator();

--- a/src/include/migraphx/matcher.hpp
+++ b/src/include/migraphx/matcher.hpp
@@ -538,6 +538,8 @@ MIGRAPHX_PRED_MATCHER(not_standard_shape, instruction_ref ins)
 {
    return not ins->get_shape().standard();
 }
+MIGRAPHX_PRED_MATCHER(dynamic_shape, instruction_ref ins) { return ins->get_shape().dynamic(); }
+MIGRAPHX_PRED_MATCHER(static_shape, instruction_ref ins) { return not ins->get_shape().dynamic(); }
 MIGRAPHX_PRED_MATCHER(broadcast_shape, instruction_ref ins)
 {
    return ins->get_shape().broadcasted();

--- a/src/include/migraphx/op/pointwise.hpp
+++ b/src/include/migraphx/op/pointwise.hpp
@@ -45,14 +45,15 @@ struct pointwise
        {
            MIGRAPHX_THROW("should have one submodule.");
        }
-        auto* pm    = mods.front();
+        auto* pm = mods.front();
+        if(pm->get_output_shapes().size() != 1)
+            MIGRAPHX_THROW("pointwise should have only one output.");
+        if(inputs.empty())
+            MIGRAPHX_THROW("pointwise should have at least one input");
        auto pnames = pm->get_parameter_names();
        std::sort(pnames.begin(), pnames.end());
        check_shapes{inputs, *this}.has(pnames.size()).same_dims();

-        if(pm->get_output_shapes().size() != 1)
-            MIGRAPHX_THROW("submodule should have only one output.");
-
        auto type = pm->get_output_shapes().front().type();

        // Scalar output if all inputs are scalar

--- a/src/include/migraphx/op/select_module.hpp
+++ b/src/include/migraphx/op/select_module.hpp
@@ -57,6 +57,7 @@ struct select_module
                     param_names.cend(),
                     std::back_inserter(ret),
                     [](auto pn) { return not contains(pn, "#output_"); });
+        std::sort(ret.begin(), ret.end());
        return ret;
    }

@@ -68,6 +69,8 @@ struct select_module
                     param_names.cend(),
                     std::back_inserter(ret),
                     [](auto pn) { return contains(pn, "#output_"); });
+        // needs to be sorted to ensure output parameter ordering
+        std::sort(ret.begin(), ret.end());
        return ret;
    }

@@ -111,6 +114,7 @@ struct select_module

        // One tuple output parameter in main module to multiple output parameters in submodule
        auto out_param_names    = get_output_parameter_names(module_to_run);
+        auto param_shapes       = module_to_run->get_parameter_shapes();
        auto output_sub_objects = args.back().get_sub_objects();
        assert(out_param_names.size() == output_sub_objects.size());
        std::transform(out_param_names.begin(),
@@ -118,10 +122,10 @@ struct select_module
                       output_sub_objects.begin(),
                       std::inserter(p_map, p_map.end()),
                       [&](auto&& name, auto&& a) {
-                           auto ps = module_to_run->get_parameter_shape(name);
+                           auto ps = param_shapes.at(name);
                           if(a.get_shape() != ps)
                           {
-                               assert(ps.bytes() == a.get_shape().bytes());
+                               assert(ps.bytes() <= a.get_shape().bytes());
                               return std::make_pair(name, a.reshape(ps));
                           }
                           else

--- a/src/include/migraphx/shape.hpp
+++ b/src/include/migraphx/shape.hpp
@@ -222,11 +222,15 @@ struct shape
    /// Map element index to space index
    std::size_t index(std::size_t i) const;

-    std::vector<std::size_t> multi(std::size_t i) const;
-    void multi_copy(std::size_t i, std::size_t* start, const std::size_t* end) const;
+    /// Map element index to multi-dimensional index
+    std::vector<std::size_t> multi(std::size_t idx) const;

-    /// Returns true if the shape is packed (number of elements and buffer size the same) with no
-    /// padding
+    /// Map element index to multi-dimensional index and put them them into location provided by
+    /// pointers
+    void multi_copy(std::size_t idx, std::size_t* start, const std::size_t* end) const;
+
+    /// Returns true if the shape is packed (number of elements and buffer size the same) with
+    /// no padding
    bool packed() const;

    /// Returns true is the shape has been transposed. That is the strides are not in descending

--- a/src/onnx/onnx_parser.cpp
+++ b/src/onnx/onnx_parser.cpp
@@ -39,6 +39,20 @@
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+static shape shape_from_dyn_dims(shape::type_t shape_type,
+                                 const std::vector<shape::dynamic_dimension>& dyn_dims)
+{
+    if(std::all_of(dyn_dims.begin(), dyn_dims.end(), [](auto dd) { return dd.is_fixed(); }))
+    {
+        std::vector<std::size_t> dims;
+        std::transform(dyn_dims.cbegin(), dyn_dims.cend(), std::back_inserter(dims), [](auto d) {
+            return d.max;
+        });
+        return {shape_type, dims};
+    }
+    return {shape_type, dyn_dims};
+}
+
 namespace onnx {

 static onnx_parser::attribute_map get_attributes(const onnx::NodeProto& node)
@@ -300,7 +314,7 @@ onnx_parser::parse_graph(module* mod, const onnx::GraphProto& graph, bool inlini
            else if(map_dyn_input_dims.count(name) > 0)
            {
                shape::type_t shape_type = get_type(input.type().tensor_type().elem_type());
-                s                        = {shape_type, map_dyn_input_dims.at(name)};
+                s = shape_from_dyn_dims(shape_type, map_dyn_input_dims.at(name));
            }
            else
            {
@@ -503,16 +517,7 @@ shape onnx_parser::parse_type(const onnx::TypeProto& t,
    {
        return {shape_type};
    }
-    if(std::all_of(dynamic_dims.begin(), dynamic_dims.end(), [](auto dd) { return dd.is_fixed(); }))
-    {
-        std::vector<std::size_t> dims;
-        std::transform(dynamic_dims.begin(),
-                       dynamic_dims.end(),
-                       std::back_inserter(dims),
-                       [](auto d) { return d.max; });
-        return {shape_type, dims};
-    }
-    return {shape_type, dynamic_dims};
+    return shape_from_dyn_dims(shape_type, dynamic_dims);
 }

 shape::type_t get_type(int dtype)

--- a/src/pass_manager.cpp
+++ b/src/pass_manager.cpp
@@ -103,6 +103,7 @@ struct module_pm : module_pass_manager

    virtual void run_pass(const pass& p) override
    {
+        trace("Pass: ", p.name());
        assert(mod);
        assert(mod->validate() == mod->end());
        if(enabled(MIGRAPHX_TIME_PASSES{}))

--- a/src/propagate_constant.cpp
+++ b/src/propagate_constant.cpp
@@ -27,11 +27,14 @@
 #include <migraphx/literal.hpp>
 #include <migraphx/functional.hpp>
 #include <migraphx/par_for.hpp>
+#include <migraphx/env.hpp>
 #include <unordered_set>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_PROPAGATE_CONSTANT)
+
 bool skip_propogate(instruction_ref ins)
 {
    if(ins->name() == "contiguous")
@@ -85,6 +88,19 @@ void propagate_constant::apply(module& m) const
    {
        if(not literals[i].empty())
        {
+            if(enabled(MIGRAPHX_TRACE_PROPAGATE_CONSTANT{}))
+            {
+                std::cout << "Constant replace: " << std::endl;
+                std::vector<instruction_ref> inss;
+                fix([&](auto self, auto ins) {
+                    if(contains(inss, ins))
+                        return;
+                    for(auto input : ins->inputs())
+                        self(input);
+                    inss.push_back(ins);
+                })(const_instrs_vec[i]);
+                m.debug_print(inss);
+            }
            assert(literals[i].get_shape() == const_instrs_vec[i]->get_shape());
            auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
            m.replace_instruction(const_instrs_vec[i], l);