Unverified Commit 18cf0435 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Merge branch 'develop' into blas_tuning

parents 12258d8f 3e8d7196
# Ignore everything
**
# Allow files and directories
!*.txt
!*.ini
!/tools/*.sh
!/doc/*.txt
!/test/onnx/.onnxrt-commit
name: migraphx
on: [push, pull_request]
on:
pull_request:
push:
branches:
- develop
- master
- 'release/**'
jobs:
cancel:
......@@ -17,40 +24,29 @@ jobs:
- name: Free space
run: |
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
du . --max-depth=1 -h
ls -la
cd /usr/local
du . --max-depth=1 -h
ls -la
cd /usr/local/lib
echo $(pwd)
du . --max-depth=1 -h
ls -la
- uses: actions/checkout@v3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
# name: Docker Layer Caching2
- uses: jpribyl/action-docker-layer-caching@v0.1.1
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for tidy
uses: pat-s/always-upload-cache@v3.0.11
- name: Restore cache files for tidy
uses: actions/cache/restore@v3
id: tidy_restore
with:
path: tidy-cache
key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
restore-keys: |
tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
tidy-cache-
key: tidy-cache-${{ github.ref }}
restore-keys: tidy-cache-
- name: Build the Docker image
run: docker build . --file hip-clang.docker --tag migraphx
......@@ -70,6 +66,25 @@ jobs:
..
make -j2 -k onnx-proto tf-proto tidy
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear tidy cache before saving
if: ${{ steps.tidy_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.tidy_restore.outputs.cache-matched-key }} --confirm
continue-on-error: true
- name: Save cache files for tidy
uses: actions/cache/save@v3
if: always()
with:
path: tidy-cache
key: tidy-cache-${{ github.ref }}
cppcheck:
runs-on: ubuntu-20.04
......@@ -81,23 +96,22 @@ jobs:
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- uses: jpribyl/action-docker-layer-caching@v0.1.1
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for cppcheck
uses: pat-s/always-upload-cache@v2.1.3
- name: Restore cache files for cppcheck
id: cppcheck_restore
uses: actions/cache/restore@v3
with:
path: cppcheck-cache
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }}
restore-keys: |
cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ steps.cache_timestamp.outputs.timestamp }}
cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
- name: Build the Docker image
run: docker build . --file hip-clang.docker --tag migraphx
......@@ -114,6 +128,25 @@ jobs:
..
make -j2 cppcheck
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear cppcheck cache before saving
if: ${{ steps.cppcheck_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.cppcheck_restore.outputs.cache-matched-key }} --confirm
continue-on-error: true
- name: Save cache files for cppcheck
uses: actions/cache/save@v3
if: always()
with:
path: cppcheck-cache
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
format:
runs-on: ubuntu-20.04
......@@ -125,7 +158,12 @@ jobs:
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- uses: jpribyl/action-docker-layer-caching@v0.1.1
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
......@@ -206,8 +244,13 @@ jobs:
- codecov
steps:
- name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- name: Free space and install rbuild, lld
run: |
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
sudo apt-get install -y lld
python -m pip install --upgrade pip
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
......@@ -217,36 +260,25 @@ jobs:
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
uses: actions/cache@v3
id: deps_cache
with:
# This path is specific to Ubuntu
path: ${{ github.workspace }}/cget
# Look to see if there is a cache hit for the corresponding requirements file
key:
${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
${{ matrix.os }}-cget-4-
key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
restore-keys: ${{ matrix.os }}-cget-4-
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
rbuild prepare -d cget -s gh
sudo apt-get install -y lld
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
if: steps.deps_cache.outputs.cache-hit != 'true'
run: rbuild prepare -d cget -s gh
- name: Cache files for ccache
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
uses: pat-s/always-upload-cache@v2.1.3
- name: Restore cache files for ccache
uses: actions/cache/restore@v3
id: ccache_restore
with:
path: ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
restore-keys: |
${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
${{ matrix.os }}-${{ matrix.configuration }}-ccache-
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
- name: Build and test
env:
......@@ -266,6 +298,23 @@ jobs:
-DCMAKE_SHARED_LINKER_FLAGS='-fuse-ld=lld'
${{ github.workspace }}/cget/bin/ccache -s
# GH actions can not update existing cache, as a workaround clear cache and then save it
- name: Clear ccache cache before saving
if: ${{ steps.ccache_restore.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
- name: Save cache files for ccache
uses: actions/cache/save@v3
if: always()
with:
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
- name: Upload code coverage
if: "matrix.configuration == 'codecov'"
env:
......@@ -309,6 +358,7 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: 3.7
- name: Cache dependencies
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
......@@ -317,9 +367,8 @@ jobs:
# This path is specific to Ubuntu
path: ${{ github.workspace }}/cget
# Look to see if there is a cache hit for the corresponding requirements file
key:
${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
${{ matrix.os }}-cget-4-
key: ${{ matrix.os }}-cget-4-${{ hashFiles('requirements.txt', 'dev-requirements.txt') }}
restore-keys: ${{ matrix.os }}-cget-4-
- name: Install dependencies
......@@ -328,22 +377,15 @@ jobs:
pip install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
rbuild prepare -d cget -s gh
sudo apt-get install -y lld
- name: Prepare timestamp
id: cache_timestamp
shell: bash
run: echo timestamp="$(date +'%Y-%m-%dT%H:%M:%S')" >> $GITHUB_OUTPUT
- name: Cache files for ccache
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
uses: pat-s/always-upload-cache@v2.1.3
- name: Restore cache files for ccache
id: ccache_restore_fpga
uses: actions/cache/restore@v3
with:
path: ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
restore-keys: |
${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ steps.cache_timestamp.outputs.timestamp }}
${{ matrix.os }}-${{ matrix.configuration }}-ccache-
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
restore-keys: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-
- name: Build and test
env:
CMAKE_PREFIX_PATH: ${{ github.workspace }}/cget
......@@ -363,17 +405,36 @@ jobs:
-DMIGRAPHX_ENABLE_FPGA=On
${{ github.workspace }}/cget/bin/ccache -s
#- name: Upload code coverage
# if: "matrix.configuration == 'codecov'"
# env:
# CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56"
# run: |
# sudo apt-get install -y lcov
# cd build
# lcov --directory . --capture --output-file $(pwd)/coverage.info
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
# lcov --list $(pwd)/coverage.info
# curl -Os https://uploader.codecov.io/latest/linux/codecov
# chmod +x codecov
# ./codecov -t ${CODECOV_TOKEN}
# echo "Uploaded"
# this is a workaround, with GH actions can not update existing cache
- name: Clear ccache cache before saving
if: ${{ steps.ccache_restore_fpga.outputs.cache-hit }}
shell: bash
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
gh extension install actions/gh-actions-cache
gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
continue-on-error: true
- name: Save cache files for ccache
uses: actions/cache/save@v3
if: always()
with:
path: ${{ github.workspace }}/ccache
key: ${{ matrix.os }}-${{ matrix.configuration }}-ccache-${{ github.ref }}
#- name: Upload code coverage
# if: "matrix.configuration == 'codecov'"
# env:
# CODECOV_TOKEN: "8545af1c-f90b-4345-92a5-0d075503ca56"
# run: |
# sudo apt-get install -y lcov
# cd build
# lcov --directory . --capture --output-file $(pwd)/coverage.info
# lcov --remove $(pwd)/coverage.info '/usr/*' --output-file $(pwd)/coverage.info
# lcov --list $(pwd)/coverage.info
# curl -Os https://uploader.codecov.io/latest/linux/codecov
# chmod +x codecov
# ./codecov -t ${CODECOV_TOKEN}
# echo "Uploaded"
name: Cleanup caches of closed PR
on:
pull_request:
types:
- closed
jobs:
cleanup:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v3
- name: Cleanup
run: |
gh extension install actions/gh-actions-cache --pin v1.0.1
REPO=${{ github.repository }}
BRANCH="refs/pull/${{ github.event.pull_request.number }}/merge"
echo "Fetching list of cache key"
cacheKeysForPR=$(gh actions-cache list -R $REPO -B $BRANCH | cut -f 1 | tail -n +3)
## Setting this to not fail the workflow while deleting cache keys.
set +e
echo "Deleting caches..."
for cacheKey in $cacheKeysForPR
do
gh actions-cache delete $cacheKey -R $REPO -B $BRANCH --confirm
done
echo "Done"
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
......@@ -10,12 +10,38 @@ on:
description: Repository for benchmark utils
required: true
default: 'ROCmSoftwarePlatform/migraphx-benchmark-utils'
base_image:
description: Base image for rocm Docker build
required: true
default: "rocm/dev-ubuntu-20.04"
docker_image:
description: Docker image name for rocm Docker build
required: true
default: "rocm-migraphx"
build_navi:
description: Build navi number
required: true
default: "0"
organization:
type: string
description: Organization based on which location of files will be different
required: true
default: "AMD"
overwrite:
type: boolean
description: Overwrite image if it already exists
required: true
jobs:
release:
uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/rocm-release.yml@main
with:
rocm_release: ${{ github.event.inputs.rocm_release }}
rocm_release: ${{ github.event.inputs.rocm_release || '5.1' }}
benchmark-utils_repo: ${{ github.event.inputs.benchmark-utils_repo || 'ROCmSoftwarePlatform/migraphx-benchmark-utils' }}
organization: ${{ github.event.inputs.organization || 'AMD' }}
base_image: ${{ github.event.inputs.base_image || 'rocm/dev-ubuntu-20.04' }}
docker_image: ${{ github.event.inputs.docker_image || 'rocm-migraphx' }}
build_navi: ${{ github.event.inputs.build_navi || '0' }}
overwrite: ${{ github.event.inputs.overwrite == 'true' }}
secrets:
gh_token: ${{ secrets.MIGRAPHX_BOT_TOKEN }}
name: Onnxruntime main weekly sync
on:
schedule:
- cron: '07 17 * * 5'
......
......@@ -110,7 +110,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@acb727b348086b58a7f261b32c0e4f0686a4c0ee -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@55c6ee66cc7502db7950693b3e845676cbf400b1 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
......@@ -56,7 +56,7 @@ build MIGraphX. The specific steps are as follows:
1) Install rocm-cmake, pip3, rocblas, and miopen-hip with the command
```
sudo apt update && sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip
sudo apt install -y rocm-cmake python3-pip rocblas miopen-hip
```
2) Install [rbuild](https://github.com/RadeonOpenCompute/rbuild) (sudo may be required here.)
......@@ -68,14 +68,11 @@ pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz
3) Build MIGraphX source code
```
rbuild build -d depend -B build --cxx=/opt/rocm/llvm/bin/clang++
rbuild build -d depend -B build
```
then all the prerequisites are in the folder `depend`, and MIGraphX is built in the `build` directory.
Note that for ROCm3.7 and later releases, Ubuntu 18.04 or later releases are needed.
Upgrade to Ubuntu 18.04 is available at [Upgrade Ubuntu to 18.04](https://github.com/ROCmSoftwarePlatform/AMDMIGraphX/wiki/Upgrade-to-Ubuntu-18.04-for-ROCM3.7-or-later-releases)
Also note that you may meet the error of `rbuild: command not found`. It is because rbuild is installed
at `$HOME/.local/bin`, which is not in `PATH`. You can either export PATH as `export PATH=$HOME/.local/bin:$PATH`
to add the folder to `PATH` or add the option `--prefix /usr/local` in the pip3 command when installing rbuild.
......@@ -89,7 +86,7 @@ If using this approach, we need to install the prerequisites, configure the cmak
For convenience, the prerequisites can be built automatically with rbuild as:
```
rbuild build -d depend --cxx=/opt/rocm/llvm/bin/clang++
rbuild prepare -d depend
```
then all the prerequisites are in the folder `depend`, and they can be used in the `cmake` configuration
......@@ -174,7 +171,6 @@ To install:
dpkg -i <path_to_deb_file>
```
### Calling MIGraphX APIs
To use MIGraphX's C/C++ API in your cmake project, we need to set `CMAKE_PREFIX_PATH` to the MIGraphX
installation location and then do
......@@ -184,8 +180,24 @@ target_link_libraries(myApp migraphx::c)
```
Where `myApp` is the cmake target in your project.
## Building for development
Using rbuild, the dependencies for development can be installed with:
```
rbuild develop
```
This will install the dependencies for development into the `deps` directory and
configure `cmake` to use those dependencies in the `build` directory. These
directories can be changed by passing the `--deps-dir` and `--build-dir` flags
to `rbuild` command:
```
rbuild develop --build-dir build_rocm_55 --deps-dir /home/user/deps_dir
```
### Building the documentation
## Building the documentation
HTML and PDF documentation can be built using:
......
......@@ -32,6 +32,10 @@ Disable fast math optimization
Perform an exhaustive search to find the fastest version of generated kernels for selected backend
.. options:: --split-single-dyn-dim
Enable the split single dynamic dimension pass
.. option:: --fp16
Quantize for fp16
......
......@@ -24,7 +24,7 @@ Load as MIGraphX JSON
.. option:: --batch [unsigned int] (Default: 1)
Set batch size for model
For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.
.. option:: --nhwc
......@@ -46,6 +46,14 @@ Trim instructions from the end (Default: 0)
Dim of a parameter (format: "@name d1 d2 dn")
.. options:: --dyn-input-dim [std::vector<std::string>]
Set dynamic dimensions of a parameter using JSON formatting (format "@name" "dynamic_dimension_json")
.. options:: --default-dyn-dim
Set the default dynamic dimension (format {min:x, max:y, optimals:[o1,o2,...]})
.. option:: --optimize, -O
Optimize when reading
......
......@@ -29,7 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as
| --tf | Load file as a tensorflow graph |
| --migraphx | Load file as a migraphx graph |
| --migraphx-json | Load file as a migraphx JSON graph |
| --batch | Set batch size for the model |
| --batch | For a static model, set batch size. For a dynamic batch model, sets the batch size at runtime.|
| --nhwc | Treat tensorflow format as nhwc |
| --nchw | Treat tensorflow format as nchw |
| --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |
......@@ -44,12 +44,16 @@ See below for a comprehensive list of commands and option arguments, as well as
| --output \| -o | Output to file |
| --fill0 | Fill parameter with 0s |
| --fill1 | Fill parameter with 1s |
| --input-dim | Set static dimensions of a parameter |
| --dyn-input-dim | Set dynamic dimensions of a parameter |
| --default-dyn-dim | Set default dynamic dimension |
| --gpu | Compile on the gpu |
| --cpu | Compile on the cpu |
| --ref | Compile on the reference implementation |
| --enable-offload-copy | Enable implicit offload copying |
| --disable-fast-math | Disable fast math optimization |
| --exhaustive-tune | Enable exhaustive search to find fastest kernel |
| --split-single-dyn-dim | Enable split_single_dyn_dim compiler pass |
| --fp16 | Quantize for fp16 |
| --int8 | Quantize for int8 |
| --tolerance | Tolerance for errors |
......
......@@ -14,6 +14,7 @@ define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On
[develop]
cxx = ${rocm_path}/llvm/bin/clang++
......@@ -25,3 +26,4 @@ define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On
......@@ -50,6 +50,7 @@ add_library(migraphx
env.cpp
file_buffer.cpp
fuse_pointwise.cpp
fuse_reduce.cpp
generate.cpp
inline_module.cpp
insert_pad.cpp
......
......@@ -148,10 +148,8 @@ shape common_shape(const std::vector<shape>& shapes)
return {compute_common_types(shapes), compute_common_lens(shapes)};
}
instruction_ref insert_common_op(module& m,
instruction_ref ins,
const operation& op,
std::vector<instruction_ref> inputs)
std::vector<instruction_ref>
insert_common_args(module& m, instruction_ref ins, std::vector<instruction_ref> inputs)
{
if(std::any_of(
inputs.cbegin(), inputs.cend(), [](auto input) { return input->get_shape().dynamic(); }))
......@@ -210,7 +208,20 @@ instruction_ref insert_common_op(module& m,
return input;
});
}
return m.insert_instruction(ins, op, inputs);
return inputs;
}
std::vector<instruction_ref> add_common_args(module& m, std::vector<instruction_ref> inputs)
{
return insert_common_args(m, m.end(), std::move(inputs));
}
instruction_ref insert_common_op(module& m,
instruction_ref ins,
const operation& op,
std::vector<instruction_ref> inputs)
{
return m.insert_instruction(ins, op, insert_common_args(m, ins, std::move(inputs)));
}
instruction_ref add_common_op(module& m, const operation& op, std::vector<instruction_ref> inputs)
......
......@@ -106,6 +106,13 @@ cpp_generator::function& cpp_generator::function::set_generic_types(const module
return *this;
}
cpp_generator::function& cpp_generator::function::add_generic_param(const std::string& pname)
{
params.push_back({pname, "T" + pname});
tparams.push_back("class T" + pname);
return *this;
}
struct cpp_generator_impl
{
std::stringstream fs{};
......@@ -182,7 +189,8 @@ std::string cpp_generator::generate_point_op(const operation& op,
std::string cpp_generator::str() const { return impl->fs.str(); }
cpp_generator::function cpp_generator::generate_module(const module& m)
cpp_generator::function cpp_generator::generate_module(const module& m,
const generate_module_callback& g)
{
function f;
auto name = transform_string(m.name(), [](char c) {
......@@ -195,13 +203,7 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
if(ins->name() == "@literal")
return shape::cpp_type(ins->get_shape().type()) + "(" +
ins->get_literal().to_string() + ")";
std::vector<std::string> args;
std::transform(ins->inputs().begin(),
ins->inputs().end(),
std::back_inserter(args),
[&](auto i) { return names.at(i); });
auto s = this->generate_point_op(ins->get_operator(), args);
auto s = g(ins, names);
if(impl->fresult)
return impl->fresult(ins->get_shape()) + '(' + s + ')';
else
......@@ -210,6 +212,24 @@ cpp_generator::function cpp_generator::generate_module(const module& m)
return f;
}
std::vector<std::string>
cpp_generator::to_args(const std::vector<instruction_ref>& inputs,
const std::unordered_map<instruction_ref, std::string>& names)
{
std::vector<std::string> args;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(args), [&](auto i) {
return names.at(i);
});
return args;
}
cpp_generator::function cpp_generator::generate_module(const module& m)
{
return this->generate_module(m, [&](auto ins, const auto& names) {
return this->generate_point_op(ins->get_operator(), to_args(ins->inputs(), names));
});
}
std::string cpp_generator::create_function(const cpp_generator::function& f)
{
impl->function_count++;
......@@ -218,6 +238,8 @@ std::string cpp_generator::create_function(const cpp_generator::function& f)
std::string name = f.name.empty() ? "f" + std::to_string(impl->function_count) : f.name;
impl->fs << join_strings(f.attributes, " ") << " " << f.return_type << " " << name;
char delim = '(';
if(f.params.empty())
impl->fs << delim;
for(auto&& p : f.params)
{
impl->fs << delim << p.type << " " << p.name;
......
......@@ -148,13 +148,21 @@ struct value_parser
template <MIGRAPHX_REQUIRES(not std::is_enum<T>{} and not is_multi_value<T>{})>
static T apply(const std::string& x)
{
T result;
std::stringstream ss;
ss.str(x);
ss >> result;
if(ss.fail())
throw std::runtime_error("Failed to parse '" + x + "' as " + type_name<T>::apply());
return result;
// handle whitespace in string
if constexpr(std::is_same<T, std::string>{})
{
return x;
}
else
{
T result;
std::stringstream ss;
ss.str(x);
ss >> result;
if(ss.fail())
throw std::runtime_error("Failed to parse '" + x + "' as " + type_name<T>::apply());
return result;
}
}
template <MIGRAPHX_REQUIRES(std::is_enum<T>{} and not is_multi_value<T>{})>
......
......@@ -33,6 +33,7 @@
#include <migraphx/tf.hpp>
#include <migraphx/onnx.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/convert_to_json.hpp>
#include <migraphx/load_save.hpp>
#include <migraphx/json.hpp>
#include <migraphx/version.h>
......@@ -68,7 +69,9 @@ struct loader
bool brief = false;
std::string output_type;
std::string output;
std::string default_dyn_dim;
std::vector<std::string> param_dims;
std::vector<std::string> dyn_param_dims;
std::vector<std::string> output_names;
void parse(argument_parser& ap)
......@@ -83,7 +86,11 @@ struct loader
ap(file_type, {"--tf"}, ap.help("Load as tensorflow"), ap.set_value("tf"));
ap(file_type, {"--migraphx"}, ap.help("Load as MIGraphX"), ap.set_value("migraphx"));
ap(file_type, {"--migraphx-json"}, ap.help("Load as MIGraphX JSON"), ap.set_value("json"));
ap(batch, {"--batch"}, ap.help("Set batch size for model"));
ap(batch,
{"--batch"},
ap.help("For a static model, sets default_dim_value size (commonly batch size). For a "
"dynamic batch model, sets the batch "
"size at runtime."));
ap(is_nhwc, {"--nhwc"}, ap.help("Treat tensorflow format as nhwc"), ap.set_value(true));
ap(skip_unknown_operators,
{"--skip-unknown-operators"},
......@@ -96,7 +103,16 @@ struct loader
ap.help("Dim of a parameter (format: \"@name d1 d2 dn\")"),
ap.append(),
ap.nargs(2));
ap(dyn_param_dims,
{"--dyn-input-dim"},
ap.help("Dynamic dimensions of a parameter (format: \"@name_1\" \"[{min:x, max:y, "
"optimals:[o1,o2,...]}, dim2,dim3, ...]\", \"@name_2\", ... You can supply a "
"single integer value for a dimension to specify it as fixed."),
ap.append(),
ap.nargs(2));
ap(default_dyn_dim,
{"--default-dyn-dim"},
ap.help("Default dynamic dimension (format: \"{min:x, max:y, optimals:[o1,o2]}\")."));
ap(output_names,
{"--output-names"},
ap.help("Names of node output (format: \"name_1 name_2 name_n\")"),
......@@ -147,6 +163,40 @@ struct loader
return map_input_dims;
}
static auto parse_dyn_dims_json(const std::string& dd_json)
{
// expecting a json string like "[{min:1,max:64,optimals:[1,2,4,8]},3,224,224]"
auto v = from_json_string(convert_to_json(dd_json));
std::vector<migraphx::shape::dynamic_dimension> dyn_dims;
std::transform(v.begin(), v.end(), std::back_inserter(dyn_dims), [&](auto x) {
if(x.is_object())
return from_value<migraphx::shape::dynamic_dimension>(x);
auto d = x.template to<std::size_t>();
return migraphx::shape::dynamic_dimension{d, d};
});
return dyn_dims;
}
static auto parse_dyn_dims_map(const std::vector<std::string>& param_dyn_dims)
{
// expecting vector of strings formatted like
// {"@param_name_0", "dd_json_0", "@param_name_1", "dd_json_1", ...}
std::unordered_map<std::string, std::vector<shape::dynamic_dimension>> map_dyn_input_dims;
std::string name = "";
for(auto&& x : param_dyn_dims)
{
if(x[0] == '@')
{
name = x.substr(1);
}
else
{
map_dyn_input_dims[name] = parse_dyn_dims_json(x);
}
}
return map_dyn_input_dims;
}
static auto parse_output_names(const std::vector<std::string>& output_names_info)
{
std::vector<std::string> output_node_names;
......@@ -158,13 +208,44 @@ struct loader
return output_node_names;
}
tf_options get_tf_options() const
{
auto map_input_dims = parse_param_dims(param_dims);
auto output_node_names = parse_output_names(output_names);
tf_options options;
options.is_nhwc = is_nhwc;
options.batch_size = batch;
options.map_input_dims = map_input_dims;
options.output_node_names = output_node_names;
return options;
}
onnx_options get_onnx_options() const
{
auto map_input_dims = parse_param_dims(param_dims);
auto map_dyn_input_dims = parse_dyn_dims_map(dyn_param_dims);
onnx_options options;
if(default_dyn_dim.empty())
{
options.default_dim_value = batch;
}
else
{
auto v = from_json_string(convert_to_json(default_dyn_dim));
options.default_dyn_dim_value = from_value<migraphx::shape::dynamic_dimension>(v);
}
options.skip_unknown_operators = skip_unknown_operators;
options.print_program_on_error = true;
options.map_input_dims = map_input_dims;
options.map_dyn_input_dims = map_dyn_input_dims;
return options;
}
program load()
{
program p;
if(model.empty())
{
auto map_input_dims = parse_param_dims(param_dims);
auto output_node_names = parse_output_names(output_names);
if(file_type.empty())
{
if(ends_with(file, ".onnx"))
......@@ -179,16 +260,11 @@ struct loader
std::cout << "Reading: " << file << std::endl;
if(file_type == "onnx")
{
onnx_options options;
options.default_dim_value = batch;
options.skip_unknown_operators = skip_unknown_operators;
options.print_program_on_error = true;
options.map_input_dims = map_input_dims;
p = parse_onnx(file, options);
p = parse_onnx(file, get_onnx_options());
}
else if(file_type == "tf")
{
p = parse_tf(file, tf_options{is_nhwc, batch, map_input_dims, output_node_names});
p = parse_tf(file, get_tf_options());
}
else if(file_type == "json")
{
......@@ -289,14 +365,21 @@ struct program_params
ap(fill1, {"--fill1"}, ap.help("Fill parameter with 1s"), ap.append(), ap.nargs(2));
}
auto generate(const program& p, const target& t, bool offload)
auto generate(const program& p, const target& t, bool offload, unsigned batch)
{
parameter_map m;
auto param_shapes = p.get_parameter_shapes();
std::unordered_map<std::string, shape> static_param_shapes;
std::transform(
param_shapes.cbegin(),
param_shapes.cend(),
std::inserter(static_param_shapes, static_param_shapes.end()),
[&](const auto& x) { return std::make_pair(x.first, x.second.to_static(batch)); });
for(auto&& s : fill0)
m[s] = fill_argument(p.get_parameter_shape(s), 0);
m[s] = fill_argument(static_param_shapes.at(s), 0);
for(auto&& s : fill1)
m[s] = fill_argument(p.get_parameter_shape(s), 1);
fill_param_map(m, p, t, offload);
m[s] = fill_argument(static_param_shapes.at(s), 1);
fill_param_map(m, static_param_shapes, t, offload);
return m;
}
};
......@@ -305,12 +388,12 @@ struct compiler_target
{
#ifdef HAVE_GPU
std::string target_name = "gpu";
#elif HAVE_CPU
#elif defined(HAVE_CPU)
std::string target_name = "cpu";
#elif HAVE_FPGA
std::string target_name = "fpga"
#elif defined(HAVE_FPGA)
std::string target_name = "fpga";
#else
std::string target_name = "ref"
std::string target_name = "ref";
#endif
void parse(argument_parser& ap)
......@@ -353,13 +436,18 @@ struct compiler
{"--exhaustive-tune"},
ap.help("Exhastively search for best tuning parameters for kernels"),
ap.set_value(true));
ap(co.split_single_dyn_dim,
{"--split-single-dyn-dim"},
ap.help("If there is a single non-fixed dynamic dimension in the model, then split to "
"static submodules"),
ap.set_value(true));
ap(quantize, {"--fp16"}, ap.help("Quantize for fp16"), ap.set_value(precision::fp16));
ap(quantize, {"--int8"}, ap.help("Quantize for int8"), ap.set_value(precision::int8));
}
auto params(const program& p)
{
return parameters.generate(p, ct.get_target(), co.offload_copy);
return parameters.generate(p, ct.get_target(), co.offload_copy, l.batch);
}
program compile()
......@@ -432,7 +520,7 @@ struct verify : command<verify>
std::cout << p << std::endl;
auto t = c.ct.get_target();
auto m = c.parameters.generate(p, t, true);
auto m = c.parameters.generate(p, t, true, c.l.batch);
if(per_instruction)
{
......
......@@ -39,36 +39,25 @@ auto get_hash(const T& x)
return std::hash<T>{}(x);
}
parameter_map fill_param_map(parameter_map& m, const program& p, const target& t, bool offload)
parameter_map fill_param_map(parameter_map& m,
const std::unordered_map<std::string, shape>& param_shapes,
const target& t,
bool offload)
{
for(auto&& x : p.get_parameter_shapes())
for(auto&& x : param_shapes)
{
argument& arg = m[x.first];
if(arg.empty())
{
assert(not x.second.dynamic());
arg = generate_argument(x.second, get_hash(x.first));
}
if(not offload)
arg = t.copy_to(arg);
}
return m;
}
parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu)
{
for(auto&& x : p.get_parameter_shapes())
{
argument& arg = m[x.first];
if(arg.empty())
arg = generate_argument(x.second, get_hash(x.first));
#ifdef HAVE_GPU
if(gpu)
arg = gpu::to_gpu(arg);
#else
(void)gpu;
#endif
}
return m;
}
parameter_map create_param_map(const program& p, const target& t, bool offload)
{
parameter_map m;
......
......@@ -30,8 +30,10 @@ namespace migraphx {
namespace driver {
inline namespace MIGRAPHX_INLINE_NS {
parameter_map
fill_param_map(parameter_map& m, const program& p, const target& t, bool offload = false);
parameter_map fill_param_map(parameter_map& m,
const std::unordered_map<std::string, shape>& param_shapes,
const target& t,
bool offload = false);
parameter_map create_param_map(const program& p, const target& t, bool offload = false);
parameter_map fill_param_map(parameter_map& m, const program& p, bool gpu);
......
......@@ -74,6 +74,7 @@ static void create_pointwise_modules(module_pass_manager& mpm)
std::unordered_map<instruction_ref, instruction_ref> param_map;
std::vector<instruction_ref> pointwise_inputs;
std::size_t i = 0;
for(auto input : ins->inputs())
{
if(contains(param_map, input))
......@@ -92,6 +93,10 @@ static void create_pointwise_modules(module_pass_manager& mpm)
}
}
// Don't create pointwise module if no inputs are detected
if(pointwise_inputs.empty())
continue;
std::vector<instruction_ref> inputs;
std::transform(ins->inputs().begin(),
ins->inputs().end(),
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/fuse_reduce.hpp>
#include <migraphx/pass_manager.hpp>
#include <migraphx/dead_code_elimination.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/program.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/check_shapes.hpp>
#include <migraphx/matcher.hpp>
#include <migraphx/register_op.hpp>
#include <iterator>
#include <map>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
struct fused_reduce
{
std::vector<std::int64_t> axes{};
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axes, "axes"));
}
shape compute_shape(const std::vector<shape>& inputs, std::vector<module_ref> mods) const
{
if(mods.size() != 1)
MIGRAPHX_THROW("should have one submodule.");
auto* sm = mods.front();
if(sm->get_output_shapes().size() != 1)
MIGRAPHX_THROW("Only one output supported");
auto names = sm->get_parameter_names();
check_shapes{inputs, *this}.has(names.size()).same_ndims();
std::sort(names.begin(), names.end());
auto shapes = sm->get_parameter_shapes();
// Check dimension matches for each input
if(not equal(names, inputs, [&](const auto& name, const auto& input) {
return shapes.at(name).lens() == input.lens();
}))
MIGRAPHX_THROW("Dimenstion does not match the submodule.");
const auto& s = inputs.at(0);
auto lens = s.lens();
if(lens != sm->get_output_shapes().front().lens())
{
for(const auto& axis : axes)
{
lens[axis] = 1;
}
}
return shape::from_permutation(
sm->get_output_shapes().front().type(), lens, find_permutation(inputs));
}
std::string name() const { return "fused_reduce"; }
};
MIGRAPHX_REGISTER_OP(fused_reduce);
static std::unordered_map<instruction_ref, instruction_ref>
get_ins_param_map(const std::vector<instruction_ref>& inputs, const_module_ref sm)
{
std::unordered_map<instruction_ref, instruction_ref> result;
auto names = sm->get_parameter_names();
std::sort(names.begin(), names.end());
assert(names.size() == inputs.size());
std::transform(names.begin(),
names.end(),
inputs.begin(),
std::inserter(result, result.end()),
[&](const auto& name, auto input) {
return std::make_pair(input, sm->get_parameter(name));
});
return result;
}
static void insert_params(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
auto n = sm->get_parameter_shapes().size();
for(auto input : ins->inputs())
{
if(contains(map_ins, input))
continue;
auto s = shape{input->get_shape().type(), input->get_shape().lens()};
map_ins[input] = sm->add_parameter("x" + std::to_string(n++), s);
}
}
static auto insert_ins_in_submodule(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
insert_params(sm, ins, map_ins);
return sm->add_instructions({ins}, map_ins);
}
static auto insert_ins_in_submodule(module_ref sm, instruction_ref ins)
{
std::unordered_map<instruction_ref, instruction_ref> map_ins;
return insert_ins_in_submodule(sm, ins, map_ins);
}
static auto
insert_module_in_submodule(module_ref sm,
instruction_ref ins,
std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
insert_params(sm, ins, map_ins);
auto* m = ins->module_inputs().front();
auto param_map = get_ins_param_map(ins->inputs(), m);
for(auto&& [input, param] : param_map)
{
map_ins[param] = map_ins.at(input);
}
return sm->add_instructions(m, map_ins);
}
static std::vector<instruction_ref>
find_inputs(module_ref sm,
const module& parent,
const std::unordered_map<instruction_ref, instruction_ref>& map_ins)
{
std::vector<instruction_ref> result;
std::map<std::string, instruction_ref> names;
for(auto&& [input, param] : map_ins)
{
if(not sm->has_instruction(param))
continue;
if(param->name() != "@param")
continue;
if(not parent.has_instruction(input))
continue;
auto v = param->get_operator().to_value();
auto name = v.at("parameter").to<std::string>();
names[name] = input;
}
std::transform(names.begin(), names.end(), std::back_inserter(result), [](const auto& p) {
return p.second;
});
assert(result.size() == sm->get_parameter_shapes().size());
return result;
}
static void create_reduce_modules(module_pass_manager& mpm)
{
std::size_t n = 0;
for(auto ins : iterator_for(mpm.get_module()))
{
if(not ins->get_operator().attributes().get("reduce", false))
continue;
if(ins->inputs().size() != 1)
continue;
auto* rm =
mpm.create_module(mpm.get_module().name() + ":" + ins->name() + std::to_string(n++));
rm->set_bypass();
rm->add_return(insert_ins_in_submodule(rm, ins));
auto v = ins->get_operator().to_value();
mpm.get_module().replace_instruction(
ins, make_op("fused_reduce", {{"axes", v["axes"]}}), ins->inputs(), {rm});
}
}
template <class... Ms>
static auto match_broadcast(Ms... ms)
{
return match::skip(match::name("contiguous"))(
match::name("multibroadcast")(match::arg(0)(ms...), match::used_once()).bind("broadcast"));
}
template <class... Ms>
static auto any_input(Ms... ms)
{
return match::any_of[match::inputs()](match::any(ms...).bind("input"));
}
static auto match_broadcastable_input(const std::string& op, const std::string& name)
{
auto match_op = match::name(op)(match::used_once()).bind(name);
auto match_op_input = any_input(match_op, match::used_once());
auto broadcast_match_op_input = any_input(match_broadcast(match_op), match::used_once());
return match::any_of(match_op_input, broadcast_match_op_input);
}
namespace {
struct find_pointwise_reduce
{
auto matcher() const
{
return match::name("fused_reduce")(match_broadcastable_input("pointwise", "pointwise"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto reduce = r.result;
auto input = r.instructions["pointwise"];
const auto* pm = input->module_inputs().front();
const auto* old_rm = reduce->module_inputs().front();
auto* rm = mpm.create_module(pm->name() + ":" + old_rm->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Insert pointwise
auto rins = insert_ins_in_submodule(rm, input, map_ins).front();
map_ins[input] = rins;
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast] = insert_ins_in_submodule(rm, broadcast, map_ins).front();
}
// Insert fused_reduce
rm->add_return(insert_module_in_submodule(rm, reduce, map_ins));
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(reduce, reduce->get_operator(), new_inputs, {rm});
}
};
struct find_reduce_pointwise
{
auto matcher() const
{
return match::name("pointwise")(match_broadcastable_input("fused_reduce", "reduce"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto pw = r.result;
auto reduce = r.instructions["reduce"];
auto input = r.instructions["input"];
const auto* pm = pw->module_inputs().front();
const auto* old_rm = reduce->module_inputs().front();
auto* rm = mpm.create_module(old_rm->name() + ":" + pm->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Copy module instructions
insert_module_in_submodule(rm, reduce, map_ins);
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast->inputs().front()] = rm->get_returns().front();
auto bout = insert_ins_in_submodule(rm, broadcast, map_ins);
map_ins[input] = bout.front();
}
else
{
map_ins[input] = rm->get_returns().front();
}
auto out = insert_ins_in_submodule(rm, pw, map_ins);
rm->replace_return(out);
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(pw, reduce->get_operator(), new_inputs, {rm});
}
};
struct find_reduce_reduce
{
auto matcher() const
{
return match::name("fused_reduce")(match_broadcastable_input("fused_reduce", "reduce"));
}
void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{
auto reduce1 = r.result;
auto reduce2 = r.instructions["reduce"];
auto input = r.instructions["input"];
if(reduce1->get_operator() != reduce2->get_operator())
return;
const auto* rm1 = reduce1->module_inputs().front();
const auto* rm2 = reduce2->module_inputs().front();
auto* rm = mpm.create_module(rm1->name() + ":" + rm2->name());
rm->set_bypass();
std::unordered_map<instruction_ref, instruction_ref> map_ins;
// Copy reduce1 instructions
insert_module_in_submodule(rm, reduce2, map_ins);
if(contains(r.instructions, "broadcast"))
{
auto broadcast = r.instructions["broadcast"];
map_ins[broadcast->inputs().front()] = rm->get_returns().front();
auto bout = insert_ins_in_submodule(rm, broadcast, map_ins);
map_ins[input] = bout.front();
}
else
{
map_ins[input] = rm->get_returns().front();
}
auto out = insert_module_in_submodule(rm, reduce1, map_ins);
rm->replace_return(out);
auto new_inputs = find_inputs(rm, mpm.get_module(), map_ins);
mpm.get_module().replace_instruction(reduce1, reduce1->get_operator(), new_inputs, {rm});
}
};
} // namespace
void fuse_reduce::apply(module_pass_manager& mpm) const
{
create_reduce_modules(mpm);
mpm.run_pass(dead_code_elimination{});
for(int i = 0; i < 4; i++)
{
match::find_matches(
mpm, find_reduce_pointwise{}, find_pointwise_reduce{}, find_reduce_reduce{});
mpm.run_pass(dead_code_elimination{});
}
}
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment