"driver/conv_driver.cpp" did not exist on "c82b833d8e76094a3702046d81872132d5c4b15a"
Commit 15a7d96a authored by Paul's avatar Paul
Browse files

Merge from develop

parents 4c370d64 eb094e57
...@@ -7,17 +7,27 @@ jobs: ...@@ -7,17 +7,27 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Cancel Previous Runs - name: Cancel Previous Runs
uses: styfle/cancel-workflow-action@0.6.0 uses: styfle/cancel-workflow-action@0.11.0
with: with:
access_token: ${{ github.token }} access_token: ${{ github.token }}
tidy: tidy:
runs-on: ubuntu-18.04 runs-on: ubuntu-20.04
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: |
sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 du . --max-depth=1 -h
ls -la
cd /usr/local
du . --max-depth=1 -h
ls -la
cd /usr/local/lib
echo $(pwd)
du . --max-depth=1 -h
ls -la
- uses: actions/checkout@v3
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
...@@ -34,7 +44,7 @@ jobs: ...@@ -34,7 +44,7 @@ jobs:
message("::set-output name=timestamp::${current_date}") message("::set-output name=timestamp::${current_date}")
- name: Cache files for tidy - name: Cache files for tidy
uses: pat-s/always-upload-cache@v2.1.3 uses: pat-s/always-upload-cache@v3.0.11
with: with:
path: tidy-cache path: tidy-cache
key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }} key: tidy-cache-${{ steps.cache_timestamp.outputs.timestamp }}
...@@ -61,12 +71,12 @@ jobs: ...@@ -61,12 +71,12 @@ jobs:
make -j2 -k onnx-proto tf-proto tidy make -j2 -k onnx-proto tf-proto tidy
cppcheck: cppcheck:
runs-on: ubuntu-18.04 runs-on: ubuntu-20.04
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
...@@ -106,12 +116,12 @@ jobs: ...@@ -106,12 +116,12 @@ jobs:
make -j2 cppcheck make -j2 cppcheck
format: format:
runs-on: ubuntu-18.04 runs-on: ubuntu-20.04
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
# In this step, this action saves a list of existing images, # In this step, this action saves a list of existing images,
# the cache is created without them in the post run. # the cache is created without them in the post run.
...@@ -142,14 +152,14 @@ jobs: ...@@ -142,14 +152,14 @@ jobs:
| xargs -n 1 -P 1 -I{} -t sh -c 'yapf {} | diff - {}' | xargs -n 1 -P 1 -I{} -t sh -c 'yapf {} | diff - {}'
pyflakes: pyflakes:
runs-on: ubuntu-18.04 runs-on: ubuntu-20.04
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v4
with: with:
python-version: 3.8 python-version: 3.8
- name: Install pyflakes - name: Install pyflakes
...@@ -163,14 +173,14 @@ jobs: ...@@ -163,14 +173,14 @@ jobs:
mypy tools/api.py mypy tools/api.py
licensing: licensing:
runs-on: ubuntu-18.04 runs-on: ubuntu-20.04
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v4
with: with:
python-version: 3.8 python-version: 3.8
- name: run License Check - name: run License Check
...@@ -190,7 +200,6 @@ jobs: ...@@ -190,7 +200,6 @@ jobs:
strategy: strategy:
matrix: matrix:
os: os:
- ubuntu-18.04
- ubuntu-20.04 - ubuntu-20.04
configuration: configuration:
- debug - debug
...@@ -199,16 +208,16 @@ jobs: ...@@ -199,16 +208,16 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v4
with: with:
python-version: 3.6 python-version: 3.7
- name: Cache dependencies - name: Cache dependencies
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
uses: actions/cache@v2 uses: actions/cache@v3
with: with:
# This path is specific to Ubuntu # This path is specific to Ubuntu
path: ${{ github.workspace }}/cget path: ${{ github.workspace }}/cget
...@@ -287,7 +296,6 @@ jobs: ...@@ -287,7 +296,6 @@ jobs:
strategy: strategy:
matrix: matrix:
os: os:
- ubuntu-18.04
- ubuntu-20.04 - ubuntu-20.04
configuration: configuration:
- debug - debug
...@@ -296,16 +304,16 @@ jobs: ...@@ -296,16 +304,16 @@ jobs:
steps: steps:
- name: Free space - name: Free space
run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android run: sudo rm -rf /usr/local/android /usr/share/dotnet /usr/local/share/boost /opt/ghc /usr/local/share/chrom* /usr/share/swift /usr/local/julia* /usr/local/lib/android /usr/local/graalvm /usr/local/aws* /usr/local/lib/heroku
- uses: actions/checkout@v2 - uses: actions/checkout@v3
- name: Set up Python - name: Set up Python
uses: actions/setup-python@v2 uses: actions/setup-python@v4
with: with:
python-version: 3.6 python-version: 3.7
- name: Cache dependencies - name: Cache dependencies
# Ignore the failure of a step and avoid terminating the job. # Ignore the failure of a step and avoid terminating the job.
continue-on-error: true continue-on-error: true
uses: actions/cache@v2 uses: actions/cache@v3
with: with:
# This path is specific to Ubuntu # This path is specific to Ubuntu
path: ${{ github.workspace }}/cget path: ${{ github.workspace }}/cget
......
...@@ -5,14 +5,14 @@ on: ...@@ -5,14 +5,14 @@ on:
branches: [develop] branches: [develop]
types: [opened, synchronize, closed] types: [opened, synchronize, closed]
schedule: schedule:
- cron: "0 5 * * 1-6" - cron: "0 6 * * 1-6"
workflow_dispatch: workflow_dispatch:
inputs: inputs:
rocm_release: rocm_release:
description: ROCm Version description: ROCm Version
required: true required: true
default: '5.2' default: '5.3'
performance_reports_repo: performance_reports_repo:
description: Result repository description: Result repository
required: true required: true
...@@ -30,9 +30,9 @@ concurrency: "perftest-${{ github.head_ref || github.base_ref || 'schedule' }}" ...@@ -30,9 +30,9 @@ concurrency: "perftest-${{ github.head_ref || github.base_ref || 'schedule' }}"
jobs: jobs:
release: release:
uses: rocmsoftwareplatform/migraphx-benchmark/.github/workflows/perf-test.yml@main uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
with: with:
rocm_release: ${{ github.event.inputs.rocm_release || '5.2' }} rocm_release: ${{ github.event.inputs.rocm_release || '5.3' }}
result_number: ${{ github.event.inputs.result_number || '10' }} result_number: ${{ github.event.inputs.result_number || '10' }}
flags: ${{ github.event.inputs.flags || '-s' }} flags: ${{ github.event.inputs.flags || '-s' }}
performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }} performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }}
......
...@@ -63,7 +63,7 @@ set(CMAKE_EXTRA_INCLUDE_FILES) ...@@ -63,7 +63,7 @@ set(CMAKE_EXTRA_INCLUDE_FILES)
include(ROCMSetupVersion) include(ROCMSetupVersion)
rocm_setup_version(VERSION 2.4) rocm_setup_version(VERSION 2.5)
set(MIGRAPHX_SO_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}) set(MIGRAPHX_SO_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR})
option( BUILD_SHARED_LIBS "Build as a shared library" ON ) option( BUILD_SHARED_LIBS "Build as a shared library" ON )
...@@ -114,6 +114,7 @@ rocm_enable_clang_tidy( ...@@ -114,6 +114,7 @@ rocm_enable_clang_tidy(
hicpp-signed-bitwise hicpp-signed-bitwise
llvm-namespace-comment llvm-namespace-comment
misc-* misc-*
-misc-confusable-identifiers
modernize-* modernize-*
performance-* performance-*
readability-* readability-*
......
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.0.2/ ubuntu main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.3/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
...@@ -71,10 +71,11 @@ RUN /download_models.sh && rm /download_models.sh ...@@ -71,10 +71,11 @@ RUN /download_models.sh && rm /download_models.sh
# Install latest ccache version # Install latest ccache version
RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
RUN cget -p $PREFIX install ccache@v4.1 RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF
# Install newer cmake for onnx runtime # Install newer cmake for onnx runtime
RUN cget -p /opt/cmake install kitware/cmake@v3.13.4 ARG CMAKE_VERSION=3.24.2
RUN cget -p /opt/cmake install -X binary https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
ARG ONNXRUNTIME_BRANCH=main ARG ONNXRUNTIME_BRANCH=main
...@@ -86,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -86,7 +87,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@e8e77eb16be413d301ea8509726d47f265d9011f -DBUILD_MIXR_TARGET=On RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@c0723a7e50043d973cb73ae51dc30d36679ee7e5 -DBUILD_MIXR_TARGET=On
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -21,9 +21,9 @@ ...@@ -21,9 +21,9 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE. # THE SOFTWARE.
##################################################################################### #####################################################################################
pfultz2/rocm-recipes ROCmSoftwarePlatform/rocm-recipes
facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cmake
ccache@v4.1 ccache@v4.1 -DENABLE_TESTING=OFF
pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11 pcre,pfultz2/pcre@8.45 -H sha256:d6f7182602a775a7d500a0cedca6449af0400c6493951513046d17615ed0bf11
danmar/cppcheck@2.9 -DHAVE_RULES=1 danmar/cppcheck@2.9 -DHAVE_RULES=1
RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build RadeonOpenCompute/rocm-cmake@1ebf7e7bc61bb5e949c171562b421264065230a7 --build
......
...@@ -29,6 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -29,6 +29,7 @@ See below for a comprehensive list of commands and option arguments, as well as
| --tf | Load file as a tensorflow graph | | --tf | Load file as a tensorflow graph |
| --migraphx | Load file as a migraphx graph | | --migraphx | Load file as a migraphx graph |
| --migraphx-json | Load file as a migraphx JSON graph | | --migraphx-json | Load file as a migraphx JSON graph |
| --batch | Set batch size for the model |
| --nhwc | Treat tensorflow format as nhwc | | --nhwc | Treat tensorflow format as nhwc |
| --nchw | Treat tensorflow format as nchw | | --nchw | Treat tensorflow format as nchw |
| --skip-unknown-operators | Skip unknown operators when parsing and continue to parse | | --skip-unknown-operators | Skip unknown operators when parsing and continue to parse |
......
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.0.2/ ubuntu main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.3/ ubuntu main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
cxx = ${rocm_path}/llvm/bin/clang++ cxx = ${rocm_path}/llvm/bin/clang++
cc = ${rocm_path}/llvm/bin/clang cc = ${rocm_path}/llvm/bin/clang
deps = deps =
pfultz2/rocm-recipes ROCmSoftwarePlatform/rocm-recipes
-f requirements.txt -f requirements.txt
[gh] [gh]
...@@ -24,4 +24,4 @@ deps = ...@@ -24,4 +24,4 @@ deps =
define = define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On MIGRAPHX_ENABLE_CPU=On
\ No newline at end of file
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
google/protobuf@v3.11.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off google/protobuf@v3.11.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
nlohmann/json@v3.8.0 nlohmann/json@v3.8.0
live-clones/blaze@v3.8 -X header -DHEADER_DIR=blaze -H sha256:d0ff011f47538285178908ea5f2cab46bb6a8f55b1edb6e03224a82dbc1a3212 live-clones/blaze@v3.8 -X header -DHEADER_DIR=blaze -H sha256:d0ff011f47538285178908ea5f2cab46bb6a8f55b1edb6e03224a82dbc1a3212
half,https://github.com/pfultz2/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969 half,https://github.com/ROCmSoftwarePlatform/half/archive/1.12.0.tar.gz -X header -H sha256:0a08660b68abb176ebc2a0cdf8de46e3182a7f46c66443bb80dbfaaec98cf969
pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build
msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off
sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On
......
...@@ -55,6 +55,7 @@ add_library(migraphx ...@@ -55,6 +55,7 @@ add_library(migraphx
insert_pad.cpp insert_pad.cpp
instruction.cpp instruction.cpp
json.cpp json.cpp
layout_nhwc.cpp
load_save.cpp load_save.cpp
make_op.cpp make_op.cpp
module.cpp module.cpp
...@@ -81,7 +82,6 @@ add_library(migraphx ...@@ -81,7 +82,6 @@ add_library(migraphx
replace_allocate.cpp replace_allocate.cpp
simplify_qdq.cpp simplify_qdq.cpp
sqlite.cpp sqlite.cpp
rewrite_batchnorm.cpp
rewrite_gelu.cpp rewrite_gelu.cpp
rewrite_pooling.cpp rewrite_pooling.cpp
rewrite_quantization.cpp rewrite_quantization.cpp
...@@ -115,7 +115,6 @@ register_migraphx_ops( ...@@ -115,7 +115,6 @@ register_migraphx_ops(
as_shape as_shape
atanh atanh
atan atan
batch_norm_inference
broadcast broadcast
capture capture
ceil ceil
...@@ -146,6 +145,7 @@ register_migraphx_ops( ...@@ -146,6 +145,7 @@ register_migraphx_ops(
if_op if_op
im2col im2col
isnan isnan
layout
leaky_relu leaky_relu
less less
load load
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <memory> #include <memory>
#include <numeric> #include <numeric>
#include <exception> #include <exception>
#include <array>
#include <vector> #include <vector>
#include <cassert> #include <cassert>
#include <iostream> #include <iostream>
......
...@@ -59,6 +59,8 @@ void auto_contiguous::apply(module& m) const ...@@ -59,6 +59,8 @@ void auto_contiguous::apply(module& m) const
auto last = std::prev(m.end()); auto last = std::prev(m.end());
for(auto ins : iterator_for(m)) for(auto ins : iterator_for(m))
{ {
if(ins->name() == "layout")
continue;
// for last instruction that is NOT a return // for last instruction that is NOT a return
if(ins->outputs().empty() and ins != last) if(ins->outputs().empty() and ins != last)
continue; continue;
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
#include <migraphx/algorithm.hpp> #include <migraphx/algorithm.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/ranges.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -43,6 +44,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -43,6 +44,7 @@ inline namespace MIGRAPHX_INLINE_NS {
// In this case we need to broadcast the (:,:,1:,:) axis // In this case we need to broadcast the (:,:,1:,:) axis
// of s0 plus the 1st dimension of s1 giving // of s0 plus the 1st dimension of s1 giving
// output_lens = (3,2,7,5) // output_lens = (3,2,7,5)
//
std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0, std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
std::vector<std::size_t> s1) std::vector<std::size_t> s1)
{ {
...@@ -50,25 +52,63 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0, ...@@ -50,25 +52,63 @@ std::vector<std::size_t> compute_broadcasted_lens(std::vector<std::size_t> s0,
return s0; return s0;
if(s0.size() > s1.size()) if(s0.size() > s1.size())
s0.swap(s1); s0.swap(s1);
std::vector<std::size_t> out_lens(s1); std::vector<std::size_t> out_lens(s1);
auto offset = s1.size() - s0.size(); auto offset = s1.size() - s0.size();
std::transform( std::transform(
s0.begin(), s0.end(), s1.begin() + offset, out_lens.begin() + offset, [&](auto a, auto b) { s0.begin(), s0.end(), s1.begin() + offset, out_lens.begin() + offset, [&](auto a, auto b) {
if(a != b and a != 1 and b != 1) if(a != b and a != 1 and b != 1)
{ {
MIGRAPHX_THROW("COMPUTE_BROADCASTLEN: shape {" + to_string_range(s0) + "} and {" + MIGRAPHX_THROW("COMPUTE_BROADCASTLEN: shape {" + migraphx::to_string_range(s0) +
to_string_range(s1) + "} mismatch!"); "} and {" + migraphx::to_string_range(s1) + "} mismatch!");
} }
return std::max(a, b); return std::max(a, b);
}); });
return out_lens; return out_lens;
} }
std::vector<shape::dynamic_dimension> compute_broadcasted_dyn_dims(shape s0, shape s1)
{
// change both shapes to dynamic_dimension representation
s0 = s0.to_dynamic();
s1 = s1.to_dynamic();
if(s0.ndim() > s1.ndim())
{
std::swap(s0, s1);
}
auto offset = s1.ndim() - s0.ndim();
std::vector<shape::dynamic_dimension> out_dims(s1.dyn_dims());
shape::dynamic_dimension one_dyn_dim{1, 1, 0};
std::transform(
s0.dyn_dims().cbegin(),
s0.dyn_dims().cend(),
s1.dyn_dims().cbegin() + offset,
out_dims.begin() + offset,
[&](auto a, auto b) {
if(a == b)
{
return a;
}
else if(a == one_dyn_dim or b == one_dyn_dim)
{
// setting opt to 0, may need to be changed
return shape::dynamic_dimension{std::max(a.min, b.min), std::max(a.max, b.max), 0};
}
else
{
MIGRAPHX_THROW("COMPUTE_BROADCASTED_DYN_DIMS: dynamic shapes {" +
migraphx::to_string_range(s0.dyn_dims()) + "} and {" +
migraphx::to_string_range(s1.dyn_dims()) + "} mismatch!");
}
});
return out_dims;
}
// Compute the common (broadcasted) dimensions of a list of fixed shapes
std::vector<std::size_t> compute_common_lens(const std::vector<shape>& shapes) std::vector<std::size_t> compute_common_lens(const std::vector<shape>& shapes)
{ {
assert(not shapes.empty()); assert(not shapes.empty());
assert(
std::none_of(shapes.cbegin(), shapes.cend(), [](auto shape) { return shape.dynamic(); }));
return transform_accumulate(shapes.begin() + 1, return transform_accumulate(shapes.begin() + 1,
shapes.end(), shapes.end(),
shapes.front().lens(), shapes.front().lens(),
...@@ -114,20 +154,63 @@ instruction_ref insert_common_op(module& m, ...@@ -114,20 +154,63 @@ instruction_ref insert_common_op(module& m,
const operation& op, const operation& op,
std::vector<instruction_ref> inputs) std::vector<instruction_ref> inputs)
{ {
auto common = common_shape(to_shapes(inputs)); if(std::any_of(
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) { inputs.cbegin(), inputs.cend(), [](auto input) { return input->get_shape().dynamic(); }))
if(input->get_shape().lens() != common.lens()) {
// currently only handles the binary case
if(inputs.size() != 2)
{ {
input = m.insert_instruction( MIGRAPHX_THROW("INSERT_COMMON_OP: not handled; " + migraphx::to_string(inputs.size()) +
ins, make_op("multibroadcast", {{"out_lens", common.lens()}}), input); "inputs, only handle two inputs if any are dynamic shape");
} }
if(input->get_shape().type() != common.type())
auto c_type = compute_common_types(to_shapes(inputs));
auto c_dyn_dims =
compute_broadcasted_dyn_dims(inputs[0]->get_shape(), inputs[1]->get_shape());
// following should work for a static or dynamic shape
if(inputs[0]->get_shape().dyn_dims() != c_dyn_dims)
{ {
input = m.insert_instruction( inputs[0] = m.insert_instruction(
ins, make_op("convert", {{"target_type", common.type()}}), input); ins,
make_op("multibroadcast", {{"out_dyn_dims", to_value(c_dyn_dims)}}),
inputs[0],
inputs[1]);
} }
return input; if(inputs[1]->get_shape().dyn_dims() != c_dyn_dims)
}); {
inputs[1] = m.insert_instruction(
ins,
make_op("multibroadcast", {{"out_dyn_dims", to_value(c_dyn_dims)}}),
inputs[1],
inputs[0]);
}
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
if(input->get_shape().type() != c_type)
{
input =
m.insert_instruction(ins, make_op("convert", {{"target_type", c_type}}), input);
}
return input;
});
}
else
{
auto common = common_shape(to_shapes(inputs));
std::transform(inputs.begin(), inputs.end(), inputs.begin(), [&](auto input) {
if(input->get_shape().lens() != common.lens())
{
input = m.insert_instruction(
ins, make_op("multibroadcast", {{"out_lens", common.lens()}}), input);
}
if(input->get_shape().type() != common.type())
{
input = m.insert_instruction(
ins, make_op("convert", {{"target_type", common.type()}}), input);
}
return input;
});
}
return m.insert_instruction(ins, op, inputs); return m.insert_instruction(ins, op, inputs);
} }
......
...@@ -74,9 +74,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -74,9 +74,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
auto x_main_module_19 = mmain->add_literal(migraphx::generate_literal( auto x_main_module_19 = mmain->add_literal(migraphx::generate_literal(
migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 18)); migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 18));
auto x_main_module_20 = mmain->add_instruction( auto x_main_module_20 = mmain->add_instruction(
migraphx::make_json_op("convolution", migraphx::make_json_op(
"{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4," "convolution",
"4],use_dynamic_same_auto_pad:0}"), "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,4]}"),
x_0, x_0,
x_main_module_19); x_main_module_19);
auto x_main_module_21 = mmain->add_instruction( auto x_main_module_21 = mmain->add_instruction(
...@@ -90,9 +90,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -90,9 +90,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
"{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"), "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
x_main_module_23); x_main_module_23);
auto x_main_module_25 = mmain->add_instruction( auto x_main_module_25 = mmain->add_instruction(
migraphx::make_json_op("convolution", migraphx::make_json_op(
"{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1," "convolution",
"1],use_dynamic_same_auto_pad:0}"), "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,1]}"),
x_main_module_24, x_main_module_24,
x_main_module_17); x_main_module_17);
auto x_main_module_26 = mmain->add_instruction( auto x_main_module_26 = mmain->add_instruction(
...@@ -106,9 +106,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -106,9 +106,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
"{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"), "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
x_main_module_28); x_main_module_28);
auto x_main_module_30 = mmain->add_instruction( auto x_main_module_30 = mmain->add_instruction(
migraphx::make_json_op("convolution", migraphx::make_json_op(
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1," "convolution",
"1],use_dynamic_same_auto_pad:0}"), "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
x_main_module_29, x_main_module_29,
x_main_module_15); x_main_module_15);
auto x_main_module_31 = mmain->add_instruction( auto x_main_module_31 = mmain->add_instruction(
...@@ -117,9 +117,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -117,9 +117,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31); mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31);
auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32); auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32);
auto x_main_module_34 = mmain->add_instruction( auto x_main_module_34 = mmain->add_instruction(
migraphx::make_json_op("convolution", migraphx::make_json_op(
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1," "convolution",
"1],use_dynamic_same_auto_pad:0}"), "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
x_main_module_33, x_main_module_33,
x_main_module_13); x_main_module_13);
auto x_main_module_35 = mmain->add_instruction( auto x_main_module_35 = mmain->add_instruction(
...@@ -128,9 +128,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) ...@@ -128,9 +128,9 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35); mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35);
auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36); auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36);
auto x_main_module_38 = mmain->add_instruction( auto x_main_module_38 = mmain->add_instruction(
migraphx::make_json_op("convolution", migraphx::make_json_op(
"{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1," "convolution",
"1],use_dynamic_same_auto_pad:0}"), "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
x_main_module_37, x_main_module_37,
x_main_module_11); x_main_module_11);
auto x_main_module_39 = mmain->add_instruction( auto x_main_module_39 = mmain->add_instruction(
......
This diff is collapsed.
...@@ -44,7 +44,6 @@ ...@@ -44,7 +44,6 @@
#include <migraphx/propagate_constant.hpp> #include <migraphx/propagate_constant.hpp>
#include <migraphx/quantization.hpp> #include <migraphx/quantization.hpp>
#include <migraphx/register_op.hpp> #include <migraphx/register_op.hpp>
#include <migraphx/rewrite_batchnorm.hpp>
#include <migraphx/simplify_algebra.hpp> #include <migraphx/simplify_algebra.hpp>
#include <migraphx/simplify_reshapes.hpp> #include <migraphx/simplify_reshapes.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
...@@ -221,7 +220,6 @@ struct loader ...@@ -221,7 +220,6 @@ struct loader
{ {
migraphx::run_passes(*p.get_main_module(), migraphx::run_passes(*p.get_main_module(),
{ {
migraphx::rewrite_batchnorm{},
migraphx::eliminate_identity{}, migraphx::eliminate_identity{},
migraphx::dead_code_elimination{}, migraphx::dead_code_elimination{},
migraphx::simplify_algebra{}, migraphx::simplify_algebra{},
......
This source diff could not be displayed because it is too large. You can view the blob instead.
...@@ -145,7 +145,7 @@ void verify_reduced(program p, ...@@ -145,7 +145,7 @@ void verify_reduced(program p,
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
auto last = std::prev(mm->end(), n + 1); auto last = std::prev(mm->end(), n + 1);
mm->remove_instructions(last, mm->end()); mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << std::endl; std::cout << "Verify: " << n << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
} }
...@@ -159,6 +159,7 @@ void verify_reduced_program(const program& p, ...@@ -159,6 +159,7 @@ void verify_reduced_program(const program& p,
{ {
const auto* mm = p.get_main_module(); const auto* mm = p.get_main_module();
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
std::cout << "Verify steps: " << n << std::endl;
for(std::size_t i = 0; i < n; i++) for(std::size_t i = 0; i < n; i++)
{ {
verify_reduced(p, i, t, options, quantize, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tolerance);
......
...@@ -42,6 +42,13 @@ static bool try_compute_shape(instruction_ref ins, ...@@ -42,6 +42,13 @@ static bool try_compute_shape(instruction_ref ins,
try try
{ {
shape new_shape = ins->get_operator().compute_shape(inputs, mods); shape new_shape = ins->get_operator().compute_shape(inputs, mods);
// Cannot tell if a dynamic shape will need to be made contiguous
if(new_shape.dynamic())
{
return false;
}
// If the output shape is a standard shape, no need to try its output // If the output shape is a standard shape, no need to try its output
if(new_shape.standard()) if(new_shape.standard())
{ {
...@@ -133,14 +140,20 @@ static void remove_contiguous(const std::string& op_name, module& m, F f) ...@@ -133,14 +140,20 @@ static void remove_contiguous(const std::string& op_name, module& m, F f)
} }
} }
// Perform evaluations in parallel // Perform static contiguous evaluations in parallel
std::vector<argument> literals(const_instructions.size()); std::vector<argument> literals(const_instructions.size());
par_for(const_instructions.size(), 1, [&](const auto i) { par_for(const_instructions.size(), 1, [&](const auto i) {
auto c = op::contiguous{}; auto c = op::contiguous{};
auto prev = const_instructions[i]->inputs().front(); auto prev = const_instructions[i]->inputs().front();
literals[i] = c.compute(c.compute_shape({prev->get_shape()}), {prev->eval()}); // compute the output contiguous shape from the previous instruction shape
shape computed_shape = c.compute_shape({prev->get_shape()});
const std::vector<argument>& prev_eval = {prev->eval()};
// prev_eval should not be used in make_compute_output_shape() as computed_shape is static
auto co_shape = make_compute_output_shape(pack(c, computed_shape, prev_eval));
literals[i] = c.compute(co_shape, prev_eval);
}); });
// Replace static contiguous operations with a literal
for(size_t i = 0; i < const_instructions.size(); i++) for(size_t i = 0; i < const_instructions.size(); i++)
{ {
auto l = m.add_literal(literals[i].get_shape(), literals[i].data()); auto l = m.add_literal(literals[i].get_shape(), literals[i].data());
......
...@@ -39,13 +39,22 @@ static literal get_scalar(instruction_ref ins) ...@@ -39,13 +39,22 @@ static literal get_scalar(instruction_ref ins)
if(ins->name() == "contiguous") if(ins->name() == "contiguous")
return get_scalar(ins->inputs().front()); return get_scalar(ins->inputs().front());
const auto& s = ins->get_shape(); const auto& s = ins->get_shape();
if(not(s.elements() == 1 or s.scalar())) if(s.elements() != 1 && not(s.scalar()))
return {}; return {};
if(not ins->can_eval()) if(not ins->can_eval())
return {}; return {};
auto e = ins->eval(); auto e = ins->eval();
literal r{}; literal r{};
e.visit_at([&](auto x) { r = literal{x}; }); // needed for bool as visit_at invokes as() which promotes bool to int8
// Without this we'll break type checks for logical ops that are fused.
if(e.get_shape().type() == shape::bool_type)
{
r = literal{e.at<bool>()};
}
else
{
e.visit_at([&](auto x) { r = literal{x}; });
}
return r; return r;
} }
...@@ -56,6 +65,8 @@ static void create_pointwise_modules(module_pass_manager& mpm) ...@@ -56,6 +65,8 @@ static void create_pointwise_modules(module_pass_manager& mpm)
{ {
if(not ins->get_operator().attributes().get("pointwise", false)) if(not ins->get_operator().attributes().get("pointwise", false))
continue; continue;
if(ins->get_operator().name() == "layout")
continue;
assert(ins->get_operator().attributes().contains("point_op")); assert(ins->get_operator().attributes().contains("point_op"));
auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++)); auto* pm = mpm.create_module(mpm.get_module().name() + ":pointwise" + std::to_string(n++));
pm->set_bypass(); pm->set_bypass();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment