Unverified Commit 18cf0435 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Merge branch 'develop' into blas_tuning

parents 12258d8f 3e8d7196
# Ignore everything
**
# Allow files and directories
!*.txt
!*.ini
!/tools/*.sh
!/doc/*.txt
!/test/onnx/.onnxrt-commit
name: migraphx name: migraphx
on: [push, pull_request] on:
pull_request:
push:
branches:
- develop
- master
- 'release/**'
jobs: jobs:
cancel: cancel:
...@@ -17,40 +24,29 @@ jobs: ...@@ -17,40 +24,29 @@ jobs:
- name: Free space - name: Free space
run: | run: |
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
du . --max-depth=1 -h
ls -la
cd /usr/local
du . --max-depth=1 -h
ls -la
cd /usr/local/lib
echo $(pwd)
du . --max-depth=1 -h
ls -la
- uses: actions/checkout@v3 - uses: actions/checkout@v3
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
# name: Docker Layer Caching2 - name: Docker layer cache
- uses: jpribyl/action-docker-layer-caching@v0.1.1 uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
- name: Prepare timestamp - name: Restore cache files for tidy
id: cache_timestamp uses: actions/cache/restore@v3
shell: bash id: tidy_restore
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for tidy
uses: pat-s/always-upload-cache@v3.0.11
with: with:
path: tidy-cache path: tidy-cache
key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }} key: tidy-cache-${{ github.ref }}
restore-keys: | restore-keys: tidy-cache-
tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
tidy-cache-
- name: Build the Docker image - name: Build the Docker image
run: docker build . --file hip-clang.docker --tag migraphx run: docker build . --file hip-clang.docker --tag migraphx
...@@ -70,6 +66,25 @@ jobs: ...@@ -70,6 +66,25 @@ jobs:
.. ..
make -j2 -k onnx-proto tf-proto tidy make -j2 -k onnx-proto tf-proto tidy
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear tidy cache before saving
if: ${{ steps.tidy_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.tidy_restore.outputs.cache-matched-key }} --confirm
continue-on-error: true
- name: Save cache files for tidy
uses: actions/cache/save@v3
if: always()
with:
path: tidy-cache
key: tidy-cache-${{ github.ref }}
cppcheck: cppcheck:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
...@@ -81,23 +96,22 @@ jobs: ...@@ -81,23 +96,22 @@ jobs:
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
- uses: jpribyl/action-docker-layer-caching@v0.1.1 - name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
- name: Prepare timestamp - name: Restore cache files for cppcheck
id: cache_timestamp id: cppcheck_restore
shell: bash uses: actions/cache/restore@v3
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for cppcheck
uses: pat-s/always-upload-cache@v2.1.3
with: with:
path: cppcheck-cache path: cppcheck-cache
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }} key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
restore-keys: | restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }}
cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
- name: Build the Docker image - name: Build the Docker image
run: docker build . --file hip-clang.docker --tag migraphx run: docker build . --file hip-clang.docker --tag migraphx
...@@ -114,6 +128,25 @@ jobs: ...@@ -114,6 +128,25 @@ jobs:
.. ..
make -j2 cppcheck make -j2 cppcheck
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear cppcheck cache before saving
if: ${{ steps.cppcheck_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.cppcheck_restore.outputs.cache-matched-key }} --confirm
continue-on-error: true
- name: Save cache files for cppcheck
uses: actions/cache/save@v3
if: always()
with:
path: cppcheck-cache
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
format: format:
runs-on: ubuntu-20.04 runs-on: ubuntu-20.04
...@@ -125,7 +158,12 @@ jobs: ...@@ -125,7 +158,12 @@ jobs:
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
# It also restores the cache if it exists. # It also restores the cache if it exists.
- uses: jpribyl/action-docker-layer-caching@v0.1.1 - name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
...@@ -206,8 +244,13 @@ jobs: ...@@ -206,8 +244,13 @@ jobs:
- codecov - codecov
steps: steps:
- name: Free space - name: Free space and install rbuild, lld
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku run: |
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
sudo apt-get install -y lld
python -m pip install --upgrade pip
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
- uses: actions/checkout@v3 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v4 uses: actions/setup-python@v4
...@@ -217,36 +260,25 @@ jobs: ...@@ -217,36 +260,25 @@ jobs:
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
uses: actions/cache@v3 uses: actions/cache@v3
id: deps_cache
with: with:
# This path is specific to Ubuntu # This path is specific to Ubuntu
path: ${{ github.workspace }}/cget path: ${{ github.workspace }}/cget
# Look to see if there is a cache hit for the corresponding requirements file # Look to see if there is a cache hit for the corresponding requirements file
key: key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }} restore-keys: ${{ matrix.os }}-cget-4-
${{ matrix.os }}-cget-4-
- name: Install dependencies - name: Install dependencies
run: | if: steps.deps_cache.outputs.cache-hit != 'true'
python -m pip install --upgrade pip run: rbuild prepare -d cget -s gh
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
rbuild prepare -d cget -s gh
sudo apt-get install -y lld
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for ccache - name: Restore cache files for ccache
# Ignore the failure of a step and avoid terminating the job. uses: actions/cache/restore@v3
continue-on-error: true id: ccache_restore
uses: pat-s/always-upload-cache@v2.1.3
with: with:
path: ccache path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }} key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
restore-keys: | restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
${{ matrix.os }}-${{ matrix.configuration }}-ccache-
- name: Build and test - name: Build and test
env: env:
...@@ -266,6 +298,23 @@ jobs: ...@@ -266,6 +298,23 @@ jobs:
-DCMAKE_SHARED_LINKER_FLAGS='-fuse-ld=lld' -DCMAKE_SHARED_LINKER_FLAGS='-fuse-ld=lld'
${{ github.workspace }}/cget/bin/ccache -s ${{ github.workspace }}/cget/bin/ccache -s
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear ccache cache before saving
if: ${{ steps.ccache_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
- name: Save cache files for ccache
uses: actions/cache/save@v3
if: always()
with:
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
- name: Upload code coverage - name: Upload code coverage
if: "matrix.configuration == 'codecov'" if: "matrix.configuration == 'codecov'"
env: env:
...@@ -309,6 +358,7 @@ jobs: ...@@ -309,6 +358,7 @@ jobs:
uses: actions/setup-python@v4 uses: actions/setup-python@v4
with: with:
python-version: 3.7 python-version: 3.7
- name: Cache dependencies - name: Cache dependencies
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
...@@ -317,9 +367,8 @@ jobs: ...@@ -317,9 +367,8 @@ jobs:
# This path is specific to Ubuntu # This path is specific to Ubuntu
path: ${{ github.workspace }}/cget path: ${{ github.workspace }}/cget
# Look to see if there is a cache hit for the corresponding requirements file # Look to see if there is a cache hit for the corresponding requirements file
key: key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }} restore-keys: ${{ matrix.os }}-cget-4-
${{ matrix.os }}-cget-4-
- name: Install dependencies - name: Install dependencies
...@@ -328,22 +377,15 @@ jobs: ...@@ -328,22 +377,15 @@ jobs:
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
rbuild prepare -d cget -s gh rbuild prepare -d cget -s gh
sudo apt-get install -y lld sudo apt-get install -y lld
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for ccache - name: Restore cache files for ccache
# Ignore the failure of a step and avoid terminating the job. id: ccache_restore_fpga
continue-on-error: true uses: actions/cache/restore@v3
uses: pat-s/always-upload-cache@v2.1.3
with: with:
path: ccache path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }} key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
restore-keys: | restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
${{ matrix.os }}-${{ matrix.configuration }}-ccache-
- name: Build and test - name: Build and test
env: env:
CMAKE_PREFIX_PATH: ${{ github.workspace }}/cget CMAKE_PREFIX_PATH: ${{ github.workspace }}/cget
...@@ -363,17 +405,36 @@ jobs: ...@@ -363,17 +405,36 @@ jobs:
-DMIGRAPHX_ENABLE_FPGA=On -DMIGRAPHX_ENABLE_FPGA=On
${{ github.workspace }}/cget/bin/ccache -s ${{ github.workspace }}/cget/bin/ccache -s
#- name: Upload code coverage # this is a workaround, with GH actions can not update existing cache
# if: "matrix.configuration == 'codecov'" - name: Clear ccache cache before saving
# env: if: ${{ steps.ccache_restore_fpga.outputs.cache-hit }}
# CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56" shell: bash
# run: | env:
# sudo apt-get install -y lcov GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
# cd build run: |
# lcov --directory . --capture --output-file $(pwd)/coverage.info gh extension install actions/gh-actions-cache
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
# lcov --list $(pwd)/coverage.info continue-on-error: true
# curl -Os https://uploader.codecov.io/latest/linux/codecov
# chmod +x codecov - name: Save cache files for ccache
# ./codecov -t ${CODECOV_TOKEN} uses: actions/cache/save@v3
# echo "Uploaded" if: always()
with:
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
#- name: Upload code coverage
# if: "matrix.configuration == 'codecov'"
# env:
# CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56"
# run: |
# sudo apt-get install -y lcov
# cd build
# lcov --directory . --capture --output-file $(pwd)/coverage.info
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
# lcov --list $(pwd)/coverage.info
# curl -Os https://uploader.codecov.io/latest/linux/codecov
# chmod +x codecov
# ./codecov -t ${CODECOV_TOKEN}
# echo "Uploaded"
name: Cleanup caches of closed PR
on:
pull_request:
types:
- closed
jobs:
cleanup:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Cleanup
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
REPO=${{ github.repository }}
BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"
echo "Fetching list of cache key"
cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 | tail -n +3)
## Setting this to not fail the workflow while deleting cache keys.
set +e
echo "Deleting caches..."
for cacheKey in $cacheKeysForPR
do
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
done
echo "Done"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
...@@ -10,12 +10,38 @@ on: ...@@ -10,12 +10,38 @@ on:
description: Repository for benchmark utils description: Repository for benchmark utils
required: true required: true
default: 'ROCmSoftwarePlatform/migraphx-benchmark-utils' default: 'ROCmSoftwarePlatform/migraphx-benchmark-utils'
base_image:
description: Base image for rocm Docker build
required: true
default: "rocm/dev-ubuntu-20.04"
docker_image:
description: Docker image name for rocm Docker build
required: true
default: "rocm-migraphx"
build_navi:
description: Build navi number
required: true
default: "0"
organization:
type: string
description: Organization based on which location of files will be different
required: true
default: "AMD"
overwrite:
type: boolean
description: Overwrite image if it already exists
required: true
jobs: jobs:
release: release:
uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/rocm-release.yml@main uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/rocm-release.yml@main
with: with:
rocm_release: ${{ github.event.inputs.rocm_release }} rocm_release: ${{ github.event.inputs.rocm_release || '5.1' }}
benchmark-utils_repo: ${{ github.event.inputs.benchmark-utils_repo || 'ROCmSoftwarePlatform/migraphx-benchmark-utils' }} benchmark-utils_repo: ${{ github.event.inputs.benchmark-utils_repo || 'ROCmSoftwarePlatform/migraphx-benchmark-utils' }}
organization: ${{ github.event.inputs.organization || 'AMD' }}
base_image: ${{ github.event.inputs.base_image || 'rocm/dev-ubuntu-20.04' }}
docker_image: ${{ github.event.inputs.docker_image || 'rocm-migraphx' }}
build_navi: ${{ github.event.inputs.build_navi || '0' }}
overwrite: ${{ github.event.inputs.overwrite == 'true' }}
secrets: secrets:
gh_token: ${{ secrets.MIGRAPHX_BOT_TOKEN }} gh_token: ${{ secrets.MIGRAPHX_BOT_TOKEN }}
name: Onnxruntime main weekly sync name: Onnxruntime main weekly sync
on: on:
schedule: schedule:
- cron: '07 17 * * 5' - cron: '07 17 * * 5'
......
...@@ -110,7 +110,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -110,7 +110,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@acb727b348086b58a7f261b32c0e4f0686a4c0ee -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@55c6ee66cc7502db7950693b3e845676cbf400b1 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -56,7 +56,7 @@ build MIGraphX. The specific steps are as follows: ...@@ -56,7 +56,7 @@ build MIGraphX. The specific steps are as follows:
1) Install rocm-cmake, pip3, rocblas, and miopen-hip with the command 1) Install rocm-cmake, pip3, rocblas, and miopen-hip with the command
``` ```
sudo apt update && sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip
``` ```
2) Install [rbuild](https://github.com/RadeonOpenCompute/rbuild) (sudo may be required here.) 2) Install [rbuild](https://github.com/RadeonOpenCompute/rbuild) (sudo may be required here.)
...@@ -68,14 +68,11 @@ pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz ...@@ -68,14 +68,11 @@ pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
3) Build MIGraphX source code 3) Build MIGraphX source code
``` ```
rbuild build -d depend -B build --cxx=/opt/rocm/llvm/bin/clang++ rbuild build -d depend -B build
``` ```
then all the prerequisites are in the folder `depend`, and MIGraphX is built in the `build` directory. then all the prerequisites are in the folder `depend`, and MIGraphX is built in the `build` directory.
Note that for ROCm3.7 and later releases, Ubuntu 18.04 or later releases are needed.
Upgrade to Ubuntu 18.04 is available at [Upgrade Ubuntu to 18.04](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/wiki/Upgrade-to-Ubuntu-18.04-for-ROCM3.7-or-later-releases)
Also note that you may meet the error of `rbuild: command not found`. It is because rbuild is installed Also note that you may meet the error of `rbuild: command not found`. It is because rbuild is installed
at `$HOME/.local/bin`, which is not in `PATH`. You can either export PATH as `export PATH=$HOME/.local/bin:$PATH` at `$HOME/.local/bin`, which is not in `PATH`. You can either export PATH as `export PATH=$HOME/.local/bin:$PATH`
to add the folder to `PATH` or add the option `--prefix /usr/local` in the pip3 command when installing rbuild. to add the folder to `PATH` or add the option `--prefix /usr/local` in the pip3 command when installing rbuild.
...@@ -89,7 +86,7 @@ If using this approach, we need to install the prerequisites, configure the cmak ...@@ -89,7 +86,7 @@ If using this approach, we need to install the prerequisites, configure the cmak
For convenience, the prerequisites can be built automatically with rbuild as: For convenience, the prerequisites can be built automatically with rbuild as:
``` ```
rbuild build -d depend --cxx=/opt/rocm/llvm/bin/clang++ rbuild prepare -d depend
``` ```
then all the prerequisites are in the folder `depend`, and they can be used in the `cmake` configuration then all the prerequisites are in the folder `depend`, and they can be used in the `cmake` configuration
...@@ -174,7 +171,6 @@ To install: ...@@ -174,7 +171,6 @@ To install:
dpkg -i <path_to_deb_file> dpkg -i <path_to_deb_file>
``` ```
### Calling MIGraphX APIs ### Calling MIGraphX APIs
To use MIGraphX's C/C++ API in your cmake project, we need to set `CMAKE_PREFIX_PATH` to the MIGraphX To use MIGraphX's C/C++ API in your cmake project, we need to set `CMAKE_PREFIX_PATH` to the MIGraphX
installation location and then do installation location and then do
...@@ -184,8 +180,24 @@ target_link_libraries(myApp migraphx::c) ...@@ -184,8 +180,24 @@ target_link_libraries(myApp migraphx::c)
``` ```
Where `myApp` is the cmake target in your project. Where `myApp` is the cmake target in your project.
## Building for development
Using rbuild, the dependencies for development can be installed with:
```
rbuild develop
```
This will install the dependencies for development into the `deps` directory and
configure `cmake` to use those dependencies in the `build` directory. These
directories can be changed by passing the `--deps-dir` and `--build-dir` flags
to `rbuild` command:
```
rbuild develop --build-dir build_rocm_55 --deps-dir /home/user/deps_dir
```
### Building the documentation ## Building the documentation
HTML and PDF documentation can be built using: HTML and PDF documentation can be built using:
......
...@@ -32,6 +32,10 @@ Disable fast math optimization ...@@ -32,6 +32,10 @@ Disable fast math optimization
Perform an exhaustive search to find the fastest version of generated kernels for selected backend Perform an exhaustive search to find the fastest version of generated kernels for selected backend
.. options:: --split-single-dyn-dim
Enable the split single dynamic dimension pass
.. option:: --fp16 .. option:: --fp16
Quantize for fp16 Quantize for fp16
......
...@@ -24,7 +24,7 @@ Load as MIGraphX JSON ...@@ -24,7 +24,7 @@ Load as MIGraphX JSON
.. option:: --batch [unsigned int] (Default: 1) .. option:: --batch [unsigned int] (Default: 1)
Set batch size for model For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.
.. option:: --nhwc .. option:: --nhwc
...@@ -46,6 +46,14 @@ Trim instructions from the end (Default: 0) ...@@ -46,6 +46,14 @@ Trim instructions from the end (Default: 0)
Dim of a parameter (format: "@name d1 d2 dn") Dim of a parameter (format: "@name d1 d2 dn")
.. options:: --dyn-input-dim [std::vector<std::string>]
Set dynamic dimensions of a parameter using JSON formatting (format "@name" "dynamic_dimension_json")
.. options:: --default-dyn-dim
Set the default dynamic dimension (format {min:x, max:y, optimals:[o1,o2,...]})
.. option:: --optimize, -O .. option:: --optimize, -O
Optimize when reading Optimize when reading
......
...@@ -29,7 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -29,7 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as
| --tf | Load file as a tensorflow graph | | --tf | Load file as a tensorflow graph |
| --migraphx | Load file as a migraphx graph | | --migraphx | Load file as a migraphx graph |
| --migraphx-json | Load file as a migraphx JSON graph | | --migraphx-json | Load file as a migraphx JSON graph |
| --batch | Set batch size for the model | | --batch | For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.|
| --nhwc | Treat tensorflow format as nhwc | | --nhwc | Treat tensorflow format as nhwc |
| --nchw | Treat tensorflow format as nchw | | --nchw | Treat tensorflow format as nchw |
| --skip-unknown-operators | Skip unknown operators when parsing and continue to parse | | --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |
...@@ -44,12 +44,16 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -44,12 +44,16 @@ See below for a comprehensive list of commands and option arguments, as well as
| --output \| -o | Output to file | | --output \| -o | Output to file |
| --fill0 | Fill parameter with 0s | | --fill0 | Fill parameter with 0s |
| --fill1 | Fill parameter with 1s | | --fill1 | Fill parameter with 1s |
| --input-dim | Set static dimensions of a parameter |
| --dyn-input-dim | Set dynamic dimensions of a parameter |
| --default-dyn-dim | Set default dynamic dimension |
| --gpu | Compile on the gpu | | --gpu | Compile on the gpu |
| --cpu | Compile on the cpu | | --cpu | Compile on the cpu |
| --ref | Compile on the reference implementation | | --ref | Compile on the reference implementation |
| --enable-offload-copy | Enable implicit offload copying | | --enable-offload-copy | Enable implicit offload copying |
| --disable-fast-math | Disable fast math optimization | | --disable-fast-math | Disable fast math optimization |
| --exhaustive-tune | Enable exhaustive search to find fastest kernel | | --exhaustive-tune | Enable exhaustive search to find fastest kernel |
| --split-single-dyn-dim | Enable split_single_dyn_dim compiler pass |
| --fp16 | Quantize for fp16 | | --fp16 | Quantize for fp16 |
| --int8 | Quantize for int8 | | --int8 | Quantize for int8 |
| --tolerance | Tolerance for errors | | --tolerance | Tolerance for errors |
......
...@@ -14,6 +14,7 @@ define = ...@@ -14,6 +14,7 @@ define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On
[develop] [develop]
cxx = ${rocm_path}/llvm/bin/clang++ cxx = ${rocm_path}/llvm/bin/clang++
...@@ -25,3 +26,4 @@ define = ...@@ -25,3 +26,4 @@ define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On
...@@ -50,6 +50,7 @@ add_library(migraphx ...@@ -50,6 +50,7 @@ add_library(migraphx
env.cpp env.cpp
file_buffer.cpp file_buffer.cpp
fuse_pointwise.cpp fuse_pointwise.cpp
fuse_reduce.cpp
generate.cpp generate.cpp
inline_module.cpp inline_module.cpp
insert_pad.cpp insert_pad.cpp
......
...@@ -148,10 +148,8 @@ shape common_shape(const std::vector<shape>& shapes) ...@@ -148,10 +148,8 @@ shape common_shape(const std::vector<shape>& shapes)
return {compute_common_types(shapes), compute_common_lens(shapes)}; return {compute_common_types(shapes), compute_common_lens(shapes)};
} }
instruction_ref insert_common_op(module& m, std::vector<instruction_ref>
instruction_ref ins, insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref> inputs)
const operation& op,
std::vector<instruction_ref> inputs)
{ {
if(std::any_of( if(std::any_of(
inputs.cbegin(), inputs.cend(), [](auto input) { return input->get_shape().dynamic(); })) inputs.cbegin(), inputs.cend(), [](auto input) { return input->get_shape().dynamic(); }))
...@@ -210,7 +208,20 @@ instruction_ref insert_common_op(module& m, ...@@ -210,7 +208,20 @@ instruction_ref insert_common_op(module& m,
return input; return input;
}); });
} }
return m.insert_instruction(ins, op, inputs); return inputs;
}
std::vector<instruction_ref> add_common_args(module& m, std::vector<instruction_ref> inputs)
{
return insert_common_args(m, m.end(), std::move(inputs));
}
instruction_ref insert_common_op(module& m,
instruction_ref ins,
const operation& op,
std::vector<instruction_ref> inputs)
{
return m.insert_instruction(ins, op, insert_common_args(m, ins, std::move(inputs)));
} }
instruction_ref add_common_op(module& m, const operation& op, std::vector<instruction_ref> inputs) instruction_ref add_common_op(module& m, const operation& op, std::vector<instruction_ref> inputs)
......
...@@ -106,6 +106,13 @@ cpp_generator::function& cpp_generator::function::set_generic_types(const module ...@@ -106,6 +106,13 @@ cpp_generator::function& cpp_generator::function::set_generic_types(const module
return *this; return *this;
} }
cpp_generator::function& cpp_generator::function::add_generic_param(const std::string& pname)
{
params.push_back({pname, "T" + pname});
tparams.push_back("class T" + pname);
return *this;
}
struct cpp_generator_impl struct cpp_generator_impl
{ {
std::stringstream fs{}; std::stringstream fs{};
...@@ -182,7 +189,8 @@ std::string cpp_generator::generate_point_op(const operation& op, ...@@ -182,7 +189,8 @@ std::string cpp_generator::generate_point_op(const operation& op,
std::string cpp_generator::str() const { return impl->fs.str(); } std::string cpp_generator::str() const { return impl->fs.str(); }
cpp_generator::function cpp_generator::generate_module(const module& m) cpp_generator::function cpp_generator::generate_module(const module& m,
const generate_module_callback& g)
{ {
function f; function f;
auto name = transform_string(m.name(), [](char c) { auto name = transform_string(m.name(), [](char c) {
...@@ -195,13 +203,7 @@ cpp_generator::function cpp_generator::generate_module(const module& m) ...@@ -195,13 +203,7 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
if(ins->name() == "@literal") if(ins->name() == "@literal")
return shape::cpp_type(ins->get_shape().type()) + "(" + return shape::cpp_type(ins->get_shape().type()) + "(" +
ins->get_literal().to_string() + ")"; ins->get_literal().to_string() + ")";
std::vector<std::string> args; auto s = g(ins, names);
std::transform(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(args),
[&](auto i) { return names.at(i); });
auto s = this->generate_point_op(ins->get_operator(), args);
if(impl->fresult) if(impl->fresult)
return impl->fresult(ins->get_shape()) + '(' + s + ')'; return impl->fresult(ins->get_shape()) + '(' + s + ')';
else else
...@@ -210,6 +212,24 @@ cpp_generator::function cpp_generator::generate_module(const module& m) ...@@ -210,6 +212,24 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
return f; return f;
} }
std::vector<std::string>
cpp_generator::to_args(const std::vector<instruction_ref>& inputs,
const std::unordered_map<instruction_ref, std::string>& names)
{
std::vector<std::string> args;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(args), [&](auto i) {
return names.at(i);
});
return args;
}
cpp_generator::function cpp_generator::generate_module(const module& m)
{
return this->generate_module(m, [&](auto ins, const auto& names) {
return this->generate_point_op(ins->get_operator(), to_args(ins->inputs(), names));
});
}
std::string cpp_generator::create_function(const cpp_generator::function& f) std::string cpp_generator::create_function(const cpp_generator::function& f)
{ {
impl->function_count++; impl->function_count++;
...@@ -218,6 +238,8 @@ std::string cpp_generator::create_function(const cpp_generator::function& f) ...@@ -218,6 +238,8 @@ std::string cpp_generator::create_function(const cpp_generator::function& f)
std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name; std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name;
impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name; impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name;
char delim = '('; char delim = '(';
if(f.params.empty())
impl->fs << delim;
for(auto&& p : f.params) for(auto&& p : f.params)
{ {
impl->fs << delim << p.type << " " << p.name; impl->fs << delim << p.type << " " << p.name;
......
...@@ -148,13 +148,21 @@ struct value_parser ...@@ -148,13 +148,21 @@ struct value_parser
template <MIGRAPHX_REQUIRES(not std::is_enum<T>{} and not is_multi_value<T>{})> template <MIGRAPHX_REQUIRES(not std::is_enum<T>{} and not is_multi_value<T>{})>
static T apply(const std::string& x) static T apply(const std::string& x)
{ {
T result; // handle whitespace in string
std::stringstream ss; if constexpr(std::is_same<T, std::string>{})
ss.str(x); {
ss >> result; return x;
if(ss.fail()) }
throw std::runtime_error("Failed to parse '" + x + "' as " + type_name<T>::apply()); else
return result; {
T result;
std::stringstream ss;
ss.str(x);
ss >> result;
if(ss.fail())
throw std::runtime_error("Failed to parse '" + x + "' as " + type_name<T>::apply());
return result;
}
} }
template <MIGRAPHX_REQUIRES(std::is_enum<T>{} and not is_multi_value<T>{})> template <MIGRAPHX_REQUIRES(std::is_enum<T>{} and not is_multi_value<T>{})>
......
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <migraphx/tf.hpp> #include <migraphx/tf.hpp>
#include <migraphx/onnx.hpp> #include <migraphx/onnx.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/convert_to_json.hpp>
#include <migraphx/load_save.hpp> #include <migraphx/load_save.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
#include <migraphx/version.h> #include <migraphx/version.h>
...@@ -68,7 +69,9 @@ struct loader ...@@ -68,7 +69,9 @@ struct loader
bool brief = false; bool brief = false;
std::string output_type; std::string output_type;
std::string output; std::string output;
std::string default_dyn_dim;
std::vector<std::string> param_dims; std::vector<std::string> param_dims;
std::vector<std::string> dyn_param_dims;
std::vector<std::string> output_names; std::vector<std::string> output_names;
void parse(argument_parser& ap) void parse(argument_parser& ap)
...@@ -83,7 +86,11 @@ struct loader ...@@ -83,7 +86,11 @@ struct loader
ap(file_type, {"--tf"}, ap.help("Load as tensorflow"), ap.set_value("tf")); ap(file_type, {"--tf"}, ap.help("Load as tensorflow"), ap.set_value("tf"));
ap(file_type, {"--migraphx"}, ap.help("Load as MIGraphX"), ap.set_value("migraphx")); ap(file_type, {"--migraphx"}, ap.help("Load as MIGraphX"), ap.set_value("migraphx"));
ap(file_type, {"--migraphx-json"}, ap.help("Load as MIGraphX JSON"), ap.set_value("json")); ap(file_type, {"--migraphx-json"}, ap.help("Load as MIGraphX JSON"), ap.set_value("json"));
ap(batch, {"--batch"}, ap.help("Set batch size for model")); ap(batch,
{"--batch"},
ap.help("For a static model, sets default_dim_value size (commonly batch size). For a "
"dynamic batch model, sets the batch "
"size at runtime."));
ap(is_nhwc, {"--nhwc"}, ap.help("Treat tensorflow format as nhwc"), ap.set_value(true)); ap(is_nhwc, {"--nhwc"}, ap.help("Treat tensorflow format as nhwc"), ap.set_value(true));
ap(skip_unknown_operators, ap(skip_unknown_operators,
{"--skip-unknown-operators"}, {"--skip-unknown-operators"},
...@@ -96,7 +103,16 @@ struct loader ...@@ -96,7 +103,16 @@ struct loader
ap.help("Dim of a parameter (format: \"@name d1 d2 dn\")"), ap.help("Dim of a parameter (format: \"@name d1 d2 dn\")"),
ap.append(), ap.append(),
ap.nargs(2)); ap.nargs(2));
ap(dyn_param_dims,
{"--dyn-input-dim"},
ap.help("Dynamic dimensions of a parameter (format: \"@name_1\" \"[{min:x, max:y, "
"optimals:[o1,o2,...]}, dim2,dim3, ...]\", \"@name_2\", ... You can supply a "
"single integer value for a dimension to specify it as fixed."),
ap.append(),
ap.nargs(2));
ap(default_dyn_dim,
{"--default-dyn-dim"},
ap.help("Default dynamic dimension (format: \"{min:x, max:y, optimals:[o1,o2]}\")."));
ap(output_names, ap(output_names,
{"--output-names"}, {"--output-names"},
ap.help("Names of node output (format: \"name_1 name_2 name_n\")"), ap.help("Names of node output (format: \"name_1 name_2 name_n\")"),
...@@ -147,6 +163,40 @@ struct loader ...@@ -147,6 +163,40 @@ struct loader
return map_input_dims; return map_input_dims;
} }
static auto parse_dyn_dims_json(const std::string& dd_json)
{
// expecting a json string like "[{min:1,max:64,optimals:[1,2,4,8]},3,224,224]"
auto v = from_json_string(convert_to_json(dd_json));
std::vector<migraphx::shape::dynamic_dimension> dyn_dims;
std::transform(v.begin(), v.end(), std::back_inserter(dyn_dims), [&](auto x) {
if(x.is_object())
return from_value<migraphx::shape::dynamic_dimension>(x);
auto d = x.template to<std::size_t>();
return migraphx::shape::dynamic_dimension{d, d};
});
return dyn_dims;
}
static auto parse_dyn_dims_map(const std::vector<std::string>& param_dyn_dims)
{
// expecting vector of strings formatted like
// {"@param_name_0", "dd_json_0", "@param_name_1", "dd_json_1", ...}
std::unordered_map<std::string, std::vector<shape::dynamic_dimension>> map_dyn_input_dims;
std::string name = "";
for(auto&& x : param_dyn_dims)
{
if(x[0] == '@')
{
name = x.substr(1);
}
else
{
map_dyn_input_dims[name] = parse_dyn_dims_json(x);
}
}
return map_dyn_input_dims;
}
static auto parse_output_names(const std::vector<std::string>& output_names_info) static auto parse_output_names(const std::vector<std::string>& output_names_info)
{ {
std::vector<std::string> output_node_names; std::vector<std::string> output_node_names;
...@@ -158,13 +208,44 @@ struct loader ...@@ -158,13 +208,44 @@ struct loader
return output_node_names; return output_node_names;
} }
tf_options get_tf_options() const
{
auto map_input_dims = parse_param_dims(param_dims);
auto output_node_names = parse_output_names(output_names);
tf_options options;
options.is_nhwc = is_nhwc;
options.batch_size = batch;
options.map_input_dims = map_input_dims;
options.output_node_names = output_node_names;
return options;
}
onnx_options get_onnx_options() const
{
auto map_input_dims = parse_param_dims(param_dims);
auto map_dyn_input_dims = parse_dyn_dims_map(dyn_param_dims);
onnx_options options;
if(default_dyn_dim.empty())
{
options.default_dim_value = batch;
}
else
{
auto v = from_json_string(convert_to_json(default_dyn_dim));
options.default_dyn_dim_value = from_value<migraphx::shape::dynamic_dimension>(v);
}
options.skip_unknown_operators = skip_unknown_operators;
options.print_program_on_error = true;
options.map_input_dims = map_input_dims;
options.map_dyn_input_dims = map_dyn_input_dims;
return options;
}
program load() program load()
{ {
program p; program p;
if(model.empty()) if(model.empty())
{ {
auto map_input_dims = parse_param_dims(param_dims);
auto output_node_names = parse_output_names(output_names);
if(file_type.empty()) if(file_type.empty())
{ {
if(ends_with(file, ".onnx")) if(ends_with(file, ".onnx"))
...@@ -179,16 +260,11 @@ struct loader ...@@ -179,16 +260,11 @@ struct loader
std::cout << "Reading: " << file << std::endl; std::cout << "Reading: " << file << std::endl;
if(file_type == "onnx") if(file_type == "onnx")
{ {
onnx_options options; p = parse_onnx(file, get_onnx_options());
options.default_dim_value = batch;
options.skip_unknown_operators = skip_unknown_operators;
options.print_program_on_error = true;
options.map_input_dims = map_input_dims;
p = parse_onnx(file, options);
} }
else if(file_type == "tf") else if(file_type == "tf")
{ {
p = parse_tf(file, tf_options{is_nhwc, batch, map_input_dims, output_node_names}); p = parse_tf(file, get_tf_options());
} }
else if(file_type == "json") else if(file_type == "json")
{ {
...@@ -289,14 +365,21 @@ struct program_params ...@@ -289,14 +365,21 @@ struct program_params
ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append(), ap.nargs(2)); ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append(), ap.nargs(2));
} }
auto generate(const program& p, const target& t, bool offload) auto generate(const program& p, const target& t, bool offload, unsigned batch)
{ {
parameter_map m; parameter_map m;
auto param_shapes = p.get_parameter_shapes();
std::unordered_map<std::string, shape> static_param_shapes;
std::transform(
param_shapes.cbegin(),
param_shapes.cend(),
std::inserter(static_param_shapes, static_param_shapes.end()),
[&](const auto& x) { return std::make_pair(x.first, x.second.to_static(batch)); });
for(auto&& s : fill0) for(auto&& s : fill0)
m[s] = fill_argument(p.get_parameter_shape(s), 0); m[s] = fill_argument(static_param_shapes.at(s), 0);
for(auto&& s : fill1) for(auto&& s : fill1)
m[s] = fill_argument(p.get_parameter_shape(s), 1); m[s] = fill_argument(static_param_shapes.at(s), 1);
fill_param_map(m, p, t, offload); fill_param_map(m, static_param_shapes, t, offload);
return m; return m;
} }
}; };
...@@ -305,12 +388,12 @@ struct compiler_target ...@@ -305,12 +388,12 @@ struct compiler_target
{ {
#ifdef HAVE_GPU #ifdef HAVE_GPU
std::string target_name = "gpu"; std::string target_name = "gpu";
#elif HAVE_CPU #elif defined(HAVE_CPU)
std::string target_name = "cpu"; std::string target_name = "cpu";
#elif HAVE_FPGA #elif defined(HAVE_FPGA)
std::string target_name = "fpga" std::string target_name = "fpga";
#else #else
std::string target_name = "ref" std::string target_name = "ref";
#endif #endif
void parse(argument_parser& ap) void parse(argument_parser& ap)
...@@ -353,13 +436,18 @@ struct compiler ...@@ -353,13 +436,18 @@ struct compiler
{"--exhaustive-tune"}, {"--exhaustive-tune"},
ap.help("Exhastively search for best tuning parameters for kernels"), ap.help("Exhastively search for best tuning parameters for kernels"),
ap.set_value(true)); ap.set_value(true));
ap(co.split_single_dyn_dim,
{"--split-single-dyn-dim"},
ap.help("If there is a single non-fixed dynamic dimension in the model, then split to "
"static submodules"),
ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16)); ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8)); ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
} }
auto params(const program& p) auto params(const program& p)
{ {
return parameters.generate(p, ct.get_target(), co.offload_copy); return parameters.generate(p, ct.get_target(), co.offload_copy, l.batch);
} }
program compile() program compile()
...@@ -432,7 +520,7 @@ struct verify : command<verify> ...@@ -432,7 +520,7 @@ struct verify : command<verify>
std::cout << p << std::endl; std::cout << p << std::endl;
auto t = c.ct.get_target(); auto t = c.ct.get_target();
auto m = c.parameters.generate(p, t, true); auto m = c.parameters.generate(p, t, true, c.l.batch);
if(per_instruction) if(per_instruction)
{ {
......
...@@ -39,36 +39,25 @@ auto get_hash(const T& x) ...@@ -39,36 +39,25 @@ auto get_hash(const T& x)
return std::hash<T>{}(x); return std::hash<T>{}(x);
} }
parameter_map fill_param_map(parameter_map& m, const program& p, const target& t, bool offload) parameter_map fill_param_map(parameter_map& m,
const std::unordered_map<std::string, shape>& param_shapes,
const target& t,
bool offload)
{ {
for(auto&& x : p.get_parameter_shapes()) for(auto&& x : param_shapes)
{ {
argument& arg = m[x.first]; argument& arg = m[x.first];
if(arg.empty()) if(arg.empty())
{
assert(not x.second.dynamic());
arg = generate_argument(x.second, get_hash(x.first)); arg = generate_argument(x.second, get_hash(x.first));
}
if(not offload) if(not offload)
arg = t.copy_to(arg); arg = t.copy_to(arg);
} }
return m; return m;
} }
parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu)
{
for(auto&& x : p.get_parameter_shapes())
{
argument& arg = m[x.first];
if(arg.empty())
arg = generate_argument(x.second, get_hash(x.first));
#ifdef HAVE_GPU
if(gpu)
arg = gpu::to_gpu(arg);
#else
(void)gpu;
#endif
}
return m;
}
parameter_map create_param_map(const program& p, const target& t, bool offload) parameter_map create_param_map(const program& p, const target& t, bool offload)
{ {
parameter_map m; parameter_map m;
......
...@@ -30,8 +30,10 @@ namespace migraphx { ...@@ -30,8 +30,10 @@ namespace migraphx {
namespace driver { namespace driver {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
parameter_map parameter_map fill_param_map(parameter_map& m,
fill_param_map(parameter_map& m, const program& p, const target& t, bool offload = false); const std::unordered_map<std::string, shape>& param_shapes,
const target& t,
bool offload = false);
parameter_map create_param_map(const program& p, const target& t, bool offload = false); parameter_map create_param_map(const program& p, const target& t, bool offload = false);
parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu); parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu);
......
...@@ -74,6 +74,7 @@ static void create_pointwise_modules(module_pass_manager& mpm) ...@@ -74,6 +74,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
std::unordered_map<instruction_ref, instruction_ref> param_map; std::unordered_map<instruction_ref, instruction_ref> param_map;
std::vector<instruction_ref> pointwise_inputs; std::vector<instruction_ref> pointwise_inputs;
std::size_t i = 0; std::size_t i = 0;
for(auto input : ins->inputs()) for(auto input : ins->inputs())
{ {
if(contains(param_map, input)) if(contains(param_map, input))
...@@ -92,6 +93,10 @@ static void create_pointwise_modules(module_pass_manager& mpm) ...@@ -92,6 +93,10 @@ static void create_pointwise_modules(module_pass_manager& mpm)
} }
} }
// Don't create pointwise module if no inputs are detected
if(pointwise_inputs.empty())
continue;
std::vector<instruction_ref> inputs; std::vector<instruction_ref> inputs;
std::transform(ins->inputs().begin(), std::transform(ins->inputs().begin(),
ins->inputs().end(), ins->inputs().end(),
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/register_op.hpp>
#include <iterator>
#include <map>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct fused_reduce
{
std::vector<std::int64_t> axes{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axes, "axes"));
}
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
{
if(mods.size() != 1)
MIGRAPHX_THROW("should have one submodule.");
auto* sm = mods.front();
if(sm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("Only one output supported");
auto names = sm->get_parameter_names();
check_shapes{inputs, *this}.has(names.size()).same_ndims();
std::sort(names.begin(), names.end());
auto shapes = sm->get_parameter_shapes();
// Check dimension matches for each input
if(not equal(names, inputs, [&](const auto& name, const auto& input) {
return shapes.at(name).lens() == input.lens();
}))
MIGRAPHX_THROW("Dimenstion does not match the submodule.");
const auto& s = inputs.at(0);
auto lens = s.lens();
if(lens != sm->get_output_shapes().front().lens())
{
for(const auto& axis : axes)
{
lens[axis] = 1;
}
}
return shape::from_permutation(
sm->get_output_shapes().front().type(), lens, find_permutation(inputs));
}
std::string name() const { return "fused_reduce"; }
};
MIGRAPHX_REGISTER_OP(fused_reduce);
static std::unordered_map<instruction_ref, instruction_ref>
get_ins_param_map(const std::vector<instruction_ref>& inputs, const_module_ref sm)
{
std::unordered_map<instruction_ref, instruction_ref> result;
auto names = sm->get_parameter_names();
std::sort(names.begin(), names.end());
assert(names.size() == inputs.size());
std::transform(names.begin(),
names.end(),
inputs.begin(),
std::inserter(result, result.end()),
[&](const auto& name, auto input) {
return std::make_pair(input, sm->get_parameter(name));
});
return result;
}
static void insert_params(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
auto n = sm->get_parameter_shapes().size();
for(auto input : ins->inputs())
{
if(contains(map_ins, input))
continue;
auto s = shape{input->get_shape().type(), input->get_shape().lens()};
map_ins[input] = sm->add_parameter("x" + std::to_string(n++), s);
}
}
static auto insert_ins_in_submodule(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
insert_params(sm, ins, map_ins);
return sm->add_instructions({ins}, map_ins);
}
static auto insert_ins_in_submodule(module_ref sm, instruction_ref ins)
{
std::unordered_map<instruction_ref, instruction_ref> map_ins;
return insert_ins_in_submodule(sm, ins, map_ins);
}
static auto
insert_module_in_submodule(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
insert_params(sm, ins, map_ins);
auto* m = ins->module_inputs().front();
auto param_map = get_ins_param_map(ins->inputs(), m);
for(auto&& [input, param] : param_map)
{
map_ins[param] = map_ins.at(input);
}
return sm->add_instructions(m, map_ins);
}
static std::vector<instruction_ref>
find_inputs(module_ref sm,
const module& parent,
const std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
std::vector<instruction_ref> result;
std::map<std::string, instruction_ref> names;
for(auto&& [input, param] : map_ins)
{
if(not sm->has_instruction(param))
continue;
if(param->name() != "@param")
continue;
if(not parent.has_instruction(input))
continue;
auto v = param->get_operator().to_value();
auto name = v.at("parameter").to<std::string>();
names[name] = input;
}
std::transform(names.begin(), names.end(), std::back_inserter(result), [](const auto& p) {
return p.second;
});
assert(result.size() == sm->get_parameter_shapes().size());
return result;
}
static void create_reduce_modules(module_pass_manager& mpm)
{
std::size_t n = 0;
for(auto ins : iterator_for(mpm.get_module()))
{
if(not ins->get_operator().attributes().get("reduce", false))
continue;
if(ins->inputs().size() != 1)
continue;
auto* rm =
mpm.create_module(mpm.get_module().name() + ":" + ins->name() + std::to_string(n++));
rm->set_bypass();
rm->add_return(insert_ins_in_submodule(rm, ins));
auto v = ins->get_operator().to_value();
mpm.get_module().replace_instruction(
ins, make_op("fused_reduce", {{"axes", v["axes"]}}), ins->inputs(), {rm});
}
}
template <class... Ms>
static auto match_broadcast(Ms... ms)
{
return match::skip(match::name("contiguous"))(
match::name("multibroadcast")(match::arg(0)(ms...), match::used_once()).bind("broadcast"));
}
template <class... Ms>
static auto any_input(Ms... ms)
{
return match::any_of[match::inputs()](match::any(ms...).bind("input"));
}
static auto match_broadcastable_input(const std::string& op, const std::string& name)
{
auto match_op = match::name(op)(match::used_once()).bind(name);
auto match_op_input = any_input(match_op, match::used_once());
auto broadcast_match_op_input = any_input(match_broadcast(match_op), match::used_once());
return match::any_of(match_op_input, broadcast_match_op_input);
}
namespace {
struct find_pointwise_reduce
{
auto matcher() const
{
return match::name("fused_reduce")(match_broadcastable_input("pointwise", "pointwise"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto reduce = r.result;
auto input = r.instructions["pointwise"];
const auto* pm = input->module_inputs().front();
const auto* old_rm = reduce->module_inputs().front();
auto* rm = mpm.create_module(pm->name() + ":" + old_rm->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Insert pointwise
auto rins = insert_ins_in_submodule(rm, input, map_ins).front();
map_ins[input] = rins;
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast] = insert_ins_in_submodule(rm, broadcast, map_ins).front();
}
// Insert fused_reduce
rm->add_return(insert_module_in_submodule(rm, reduce, map_ins));
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(reduce, reduce->get_operator(), new_inputs, {rm});
}
};
struct find_reduce_pointwise
{
auto matcher() const
{
return match::name("pointwise")(match_broadcastable_input("fused_reduce", "reduce"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto pw = r.result;
auto reduce = r.instructions["reduce"];
auto input = r.instructions["input"];
const auto* pm = pw->module_inputs().front();
const auto* old_rm = reduce->module_inputs().front();
auto* rm = mpm.create_module(old_rm->name() + ":" + pm->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Copy module instructions
insert_module_in_submodule(rm, reduce, map_ins);
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast->inputs().front()] = rm->get_returns().front();
auto bout = insert_ins_in_submodule(rm, broadcast, map_ins);
map_ins[input] = bout.front();
}
else
{
map_ins[input] = rm->get_returns().front();
}
auto out = insert_ins_in_submodule(rm, pw, map_ins);
rm->replace_return(out);
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(pw, reduce->get_operator(), new_inputs, {rm});
}
};
struct find_reduce_reduce
{
auto matcher() const
{
return match::name("fused_reduce")(match_broadcastable_input("fused_reduce", "reduce"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto reduce1 = r.result;
auto reduce2 = r.instructions["reduce"];
auto input = r.instructions["input"];
if(reduce1->get_operator() != reduce2->get_operator())
return;
const auto* rm1 = reduce1->module_inputs().front();
const auto* rm2 = reduce2->module_inputs().front();
auto* rm = mpm.create_module(rm1->name() + ":" + rm2->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Copy reduce1 instructions
insert_module_in_submodule(rm, reduce2, map_ins);
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast->inputs().front()] = rm->get_returns().front();
auto bout = insert_ins_in_submodule(rm, broadcast, map_ins);
map_ins[input] = bout.front();
}
else
{
map_ins[input] = rm->get_returns().front();
}
auto out = insert_module_in_submodule(rm, reduce1, map_ins);
rm->replace_return(out);
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(reduce1, reduce1->get_operator(), new_inputs, {rm});
}
};
} // namespace
void fuse_reduce::apply(module_pass_manager& mpm) const
{
create_reduce_modules(mpm);
mpm.run_pass(dead_code_elimination{});
for(int i = 0; i < 4; i++)
{
match::find_matches(
mpm, find_reduce_pointwise{}, find_pointwise_reduce{}, find_reduce_reduce{});
mpm.run_pass(dead_code_elimination{});
}
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment