Commit c2bafa5d authored by Alan Turner's avatar Alan Turner
Browse files

Merge branch 'ck-flash-attn' of...

Merge branch 'ck-flash-attn' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into ck-flash-attn
parents 370b2cce 250d3c87
...@@ -7,7 +7,7 @@ jobs: ...@@ -7,7 +7,7 @@ jobs:
benchmark: benchmark:
uses: ROCmSoftwarePlatform/actions/.github/workflows/benchmarks.yml@main uses: ROCmSoftwarePlatform/actions/.github/workflows/benchmarks.yml@main
with: with:
rocm_version: 5.2 rocm_version: 5.7
script_repo: migraphx-benchmark/benchmark-utils script_repo: migraphx-benchmark/benchmark-utils
result_path: /usr/share/migraphx/test-results result_path: /usr/share/migraphx/test-results
result_repo: ROCmSoftwarePlatform/comparison-results result_repo: ROCmSoftwarePlatform/comparison-results
......
...@@ -137,11 +137,11 @@ jobs: ...@@ -137,11 +137,11 @@ jobs:
-DMIGRAPHX_ENABLE_GPU=On \ -DMIGRAPHX_ENABLE_GPU=On \
-DMIGRAPHX_ENABLE_CPU=On \ -DMIGRAPHX_ENABLE_CPU=On \
-DMIGRAPHX_ENABLE_FPGA=On \ -DMIGRAPHX_ENABLE_FPGA=On \
-DMIGRAPHX_ENABLE_MLIR=On \
-DBUILD_DEV=On \ -DBUILD_DEV=On \
-DROCM_ENABLE_GH_ANNOTATIONS=On \ -DROCM_ENABLE_GH_ANNOTATIONS=On \
-DCLANG_TIDY_DEPEND_ON_TARGET=Off \ -DCLANG_TIDY_DEPEND_ON_TARGET=Off \
-DCLANG_TIDY_CACHE=/data/tidy-cache \ -DCLANG_TIDY_CACHE=/data/tidy-cache \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) -k onnx-proto tf-proto tidy make -j$(nproc) -k onnx-proto tf-proto tidy
...@@ -191,6 +191,7 @@ jobs: ...@@ -191,6 +191,7 @@ jobs:
-DCPPCHECK_BUILD_DIR=/data/cppcheck-cache \ -DCPPCHECK_BUILD_DIR=/data/cppcheck-cache \
-DBUILD_DEV=On \ -DBUILD_DEV=On \
-DROCM_ENABLE_GH_ANNOTATIONS=On \ -DROCM_ENABLE_GH_ANNOTATIONS=On \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) cppcheck make -j$(nproc) cppcheck
...@@ -227,6 +228,7 @@ jobs: ...@@ -227,6 +228,7 @@ jobs:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -279,6 +281,7 @@ jobs: ...@@ -279,6 +281,7 @@ jobs:
-DBUILD_DEV=On \ -DBUILD_DEV=On \
-DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \
-DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) tests driver make -j$(nproc) tests driver
...@@ -305,6 +308,7 @@ jobs: ...@@ -305,6 +308,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -335,6 +339,7 @@ jobs: ...@@ -335,6 +339,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -375,6 +380,7 @@ jobs: ...@@ -375,6 +380,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -493,6 +499,7 @@ jobs: ...@@ -493,6 +499,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
......
...@@ -12,7 +12,7 @@ on: ...@@ -12,7 +12,7 @@ on:
rocm_release: rocm_release:
description: ROCm Version description: ROCm Version
required: true required: true
default: '5.6' default: '5.7'
performance_reports_repo: performance_reports_repo:
description: Repository where performance reports are stored description: Repository where performance reports are stored
required: true required: true
...@@ -50,7 +50,7 @@ jobs: ...@@ -50,7 +50,7 @@ jobs:
release: release:
uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
with: with:
rocm_release: ${{ github.event.inputs.rocm_release || '5.6' }} rocm_release: ${{ github.event.inputs.rocm_release || '5.7' }}
result_number: ${{ github.event.inputs.result_number || '10' }} result_number: ${{ github.event.inputs.result_number || '10' }}
flags: ${{ github.event.inputs.flags || '-r' }} flags: ${{ github.event.inputs.flags || '-r' }}
performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }} performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }}
......
...@@ -80,3 +80,6 @@ docs/html ...@@ -80,3 +80,6 @@ docs/html
cmake-build*/ cmake-build*/
build*/ build*/
# Recommended location to install rbuild dependencies from README.md
depend
...@@ -162,6 +162,8 @@ rocm_enable_clang_tidy( ...@@ -162,6 +162,8 @@ rocm_enable_clang_tidy(
-cppcoreguidelines-pro-type-vararg -cppcoreguidelines-pro-type-vararg
-cppcoreguidelines-special-member-functions -cppcoreguidelines-special-member-functions
-cppcoreguidelines-virtual-class-destructor -cppcoreguidelines-virtual-class-destructor
-cppcoreguidelines-avoid-capture-default-when-capturing-this
-cppcoreguidelines-rvalue-reference-param-not-moved
-google-readability-* -google-readability-*
-google-runtime-int -google-runtime-int
-google-runtime-references -google-runtime-references
......
...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && ...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl &&
curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.7/ focal main > /etc/apt/sources.list.d/rocm.list'
# From docs.amd.com for installing rocm. Needed to install properly # From docs.amd.com for installing rocm. Needed to install properly
RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600" RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600"
...@@ -101,10 +101,6 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma ...@@ -101,10 +101,6 @@ RUN cget -p $PREFIX install facebook/zstd@v1.4.5 -X subdir -DCMAKE_DIR=build/cma
RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF RUN cget -p $PREFIX install ccache@v4.1 -DENABLE_TESTING=OFF
RUN cget -p /opt/cmake install kitware/cmake@v3.26.4 RUN cget -p /opt/cmake install kitware/cmake@v3.26.4
# Install MLIR
ADD mlir-requirements.txt /mlir-requirements.txt
RUN cget -p /usr/local install -f /mlir-requirements.txt
COPY ./test/onnx/.onnxrt-commit / COPY ./test/onnx/.onnxrt-commit /
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
......
def getgputargets() {
targets="gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx1102"
return targets
}
// def rocmtestnode(variant, name, body, args, pre) { // def rocmtestnode(variant, name, body, args, pre) {
def rocmtestnode(Map conf) { def rocmtestnode(Map conf) {
...@@ -107,11 +111,13 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -107,11 +111,13 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
stage('hipRTC Debug') { stage('hipRTC Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true) def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On -DGPU_TARGETS='${gpu_targets}'", gpu_debug: true)
} }
}, clang_release: rocmnode('cdna') { cmake_build -> }, clang_release: rocmnode('mi100+') { cmake_build ->
stage('Hip Clang Release') { stage('Hip Clang Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS='${gpu_targets}'")
stash includes: 'build/*.deb', name: 'migraphx-package' stash includes: 'build/*.deb', name: 'migraphx-package'
} }
// }, hidden_symbols: rocmnode('cdna') { cmake_build -> // }, hidden_symbols: rocmnode('cdna') { cmake_build ->
...@@ -120,7 +126,8 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -120,7 +126,8 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
// } // }
}, all_targets_debug : rocmnode('cdna') { cmake_build -> }, all_targets_debug : rocmnode('cdna') { cmake_build ->
stage('All targets Release') { stage('All targets Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On -DGPU_TARGETS='${gpu_targets}'")
} }
}, mlir_debug: rocmnode('cdna') { cmake_build -> }, mlir_debug: rocmnode('cdna') { cmake_build ->
stage('MLIR Debug') { stage('MLIR Debug') {
...@@ -129,20 +136,23 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -129,20 +136,23 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
// Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS. // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}" def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DGPU_TARGETS='${gpu_targets}'")
} }
} }
}, ck_release: rocmnode('mi100+') { cmake_build -> }, ck_hiprtc: rocmnode('mi100+') { cmake_build ->
stage('CK Release') { stage('CK hipRTC') {
withEnv(['MIGRAPHX_ENABLE_CK=1', 'MIGRAPHX_TUNE_CK=1']) { withEnv(['MIGRAPHX_ENABLE_CK=1', 'MIGRAPHX_TUNE_CK=1']) {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_USE_HIPRTC=On -DGPU_TARGETS='${gpu_targets}'")
} }
} }
}, clang_asan: rocmnode('nogpu') { cmake_build -> }, clang_asan: rocmnode('nogpu') { cmake_build ->
stage('Clang ASAN') { stage('Clang ASAN') {
def sanitizers = "undefined,address" def sanitizers = "undefined,address"
def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fno-omit-frame-pointer -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") def gpu_targets = getgputargets()
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_GPU=Off -DMIGRAPHX_ENABLE_CPU=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DGPU_TARGETS='${gpu_targets}'")
} }
}//, clang_release_navi: rocmnode('navi21') { cmake_build -> }//, clang_release_navi: rocmnode('navi21') { cmake_build ->
// stage('HIP Clang Release Navi') { // stage('HIP Clang Release Navi') {
...@@ -159,13 +169,14 @@ def onnxnode(name, body) { ...@@ -159,13 +169,14 @@ def onnxnode(name, body) {
} }
} }
rocmtest onnx: onnxnode('cdna') { cmake_build -> rocmtest onnx: onnxnode('mi100+') { cmake_build ->
stage("Onnx runtime") { stage("Onnx runtime") {
sh ''' sh '''
apt install half apt install half
#ls -lR #ls -lR
md5sum ./build/*.deb md5sum ./build/*.deb
dpkg -i ./build/*.deb dpkg -i ./build/*.deb
env
cd /onnxruntime && ./build_and_test_onnxrt.sh cd /onnxruntime && ./build_and_test_onnxrt.sh
''' '''
} }
......
...@@ -21,7 +21,7 @@ charset-normalizer==3.1.0 ...@@ -21,7 +21,7 @@ charset-normalizer==3.1.0
# via requests # via requests
click==8.1.3 click==8.1.3
# via sphinx-external-toc # via sphinx-external-toc
cryptography==41.0.3 cryptography==41.0.4
# via pyjwt # via pyjwt
deprecated==1.2.13 deprecated==1.2.13
# via pygithub # via pygithub
...@@ -87,7 +87,7 @@ requests==2.28.2 ...@@ -87,7 +87,7 @@ requests==2.28.2
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core>=0.20.0 rocm-docs-core==0.24.2
# via -r requirements.in # via -r requirements.in
smmap==5.0.0 smmap==5.0.0
# via gitdb # via gitdb
......
...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro ...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro
std::vector<float> results_vector(64); std::vector<float> results_vector(64);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify::verify_range(results_vector, sol)); EXPECT(migraphx::verify::verify_rms_range(results_vector, sol));
An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU. An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target. By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.
......
...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency ...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency
.. include:: ./driver/compile.rst .. include:: ./driver/compile.rst
.. option:: --tolerance [double] .. option:: --rms-tol [double]
Tolerance for errors (Default: 80) Tolerance for RMS error (Default: 0.001)
.. option:: --atol [double]
Tolerance for elementwise absolute difference (Default: 0.001)
.. option:: --rtol [double]
Tolerance for elementwise relative difference (Default: 0.001)
.. option:: -i, --per-instruction .. option:: -i, --per-instruction
......
...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as
| --exhaustive-tune | Enable exhaustive search to find fastest kernel | | --exhaustive-tune | Enable exhaustive search to find fastest kernel |
| --fp16 | Quantize for fp16 | | --fp16 | Quantize for fp16 |
| --int8 | Quantize for int8 | | --int8 | Quantize for int8 |
| --tolerance | Tolerance for errors | | --rms-tol | Tolerance for the RMS error (Default: 0.001) |
| --atol | Tolerance for elementwise absolute difference (Default: 0.001) |
| --rtol | Tolerance for elementwise relative difference (Default: 0.001) |
| --per-instruction \| -i | Verify each instruction | | --per-instruction \| -i | Verify each instruction |
| --reduce \| -r | Reduce program and verify | | --reduce \| -r | Reduce program and verify |
| --iterations \| -n | Number of iterations to run for perf report | | --iterations \| -n | Number of iterations to run for perf report |
......
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.7/ focal main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
...@@ -60,6 +60,3 @@ RUN pip3 install cmake==3.22.1 ...@@ -60,6 +60,3 @@ RUN pip3 install cmake==3.22.1
COPY ./tools/install_prereqs.sh / COPY ./tools/install_prereqs.sh /
RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh RUN /install_prereqs.sh /usr/local / && rm /install_prereqs.sh
# Install MLIR
ADD mlir-requirements.txt /mlir-requirements.txt
RUN cget -p /usr/local install -f /mlir-requirements.txt
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
ROCmSoftwarePlatform/rocMLIR@2c519c48eaa278d13e6c40bc0941119826d71512 -DBUILD_FAT_LIBROCKCOMPILER=On
...@@ -29,3 +29,12 @@ define = ...@@ -29,3 +29,12 @@ define =
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On BUILD_DEV=On
[cibuild]
cxx = ${rocm_path}/llvm/bin/clang++
cc = ${rocm_path}/llvm/bin/clang
deps =
-f dev-requirements.txt
define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
...@@ -29,3 +29,4 @@ pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build ...@@ -29,3 +29,4 @@ pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build
msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off
sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On
ROCmSoftwarePlatform/composable_kernel@4b0b327b81668978249fd9b6eb1c35214e7d78ea -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On ROCmSoftwarePlatform/composable_kernel@4b0b327b81668978249fd9b6eb1c35214e7d78ea -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On
ROCmSoftwarePlatform/rocMLIR@a48dfb1f163fb0b38369e73e580968b72e85b594 -DBUILD_FAT_LIBROCKCOMPILER=On
...@@ -197,6 +197,7 @@ register_migraphx_ops( ...@@ -197,6 +197,7 @@ register_migraphx_ops(
reduce_sum reduce_sum
relu relu
reshape reshape
reshape_lazy
reverse reverse
rnn rnn
rnn_last_cell_output rnn_last_cell_output
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -536,13 +536,19 @@ struct params : command<params> ...@@ -536,13 +536,19 @@ struct params : command<params>
struct verify : command<verify> struct verify : command<verify>
{ {
compiler c; compiler c;
double tolerance = 80; migraphx::verify::tolerance tols;
bool per_instruction = false; bool per_instruction = false;
bool reduce = false; bool reduce = false;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
c.parse(ap); c.parse(ap);
ap(tolerance, {"--tolerance"}, ap.help("Tolerance for errors")); ap(tols.rms_tol, {"--rms-tol"}, ap.help("Tolerance for the RMS error (Default: 0.001)"));
ap(tols.atol,
{"--atol"},
ap.help("Tolerance for the elementwise absolute difference (Default: 0.001)"));
ap(tols.rtol,
{"--rtol"},
ap.help("Tolerance for the elementwise relative difference (Default: 0.001)"));
ap(per_instruction, ap(per_instruction,
{"-i", "--per-instruction"}, {"-i", "--per-instruction"},
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
...@@ -567,15 +573,15 @@ struct verify : command<verify> ...@@ -567,15 +573,15 @@ struct verify : command<verify>
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, c.co, quantize, tolerance); verify_instructions(p, t, c.co, quantize, tols);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, t, c.co, quantize, m, tolerance); verify_reduced_program(p, t, c.co, quantize, m, tols);
} }
else else
{ {
verify_program(c.l.file, p, t, c.co, quantize, m, tolerance); verify_program(c.l.file, p, t, c.co, quantize, m, tols);
} }
} }
}; };
......
...@@ -77,24 +77,24 @@ void verify_program(const std::string& name, ...@@ -77,24 +77,24 @@ void verify_program(const std::string& name,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto x = run_ref(p, inputs); auto ref_outs = run_ref(p, inputs);
auto y = run_target(p, t, options, quantize, inputs); auto target_outs = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size(); std::size_t output_num = ref_outs.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
{ {
if(x[i].get_shape().type() != y[i].get_shape().type() or if(ref_outs[i].get_shape().type() != target_outs[i].get_shape().type() or
x[i].get_shape().lens() != y[i].get_shape().lens()) ref_outs[i].get_shape().lens() != target_outs[i].get_shape().lens())
{ {
std::cout << "FAILED: " << name << std::endl; std::cout << "FAILED: " << name << std::endl;
std::cout << "Shape mismatch {" << x[i].get_shape() << "} != {" << y[i].get_shape() std::cout << "Shape mismatch {" << ref_outs[i].get_shape() << "} != {"
<< "}" << std::endl; << target_outs[i].get_shape() << "}" << std::endl;
} }
else else
{ {
verify_args(name, x[i], y[i], tolerance); verify_args(name, target_outs[i], verify::expected{ref_outs[i]}, tols);
} }
} }
} }
...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog, ...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize, precision quantize,
double tolerance) verify::tolerance tols)
{ {
const auto* mm_prog = prog.get_main_module(); const auto* mm_prog = prog.get_main_module();
for(auto&& ins : (*mm_prog)) for(auto&& ins : (*mm_prog))
...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog, ...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog,
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program( verify_program(ins.name(), p, t, options, quantize, create_param_map(p, false), tols);
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -151,7 +150,7 @@ void verify_reduced(program p, ...@@ -151,7 +150,7 @@ void verify_reduced(program p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
auto last = std::prev(mm->end(), n); auto last = std::prev(mm->end(), n);
...@@ -160,7 +159,7 @@ void verify_reduced(program p, ...@@ -160,7 +159,7 @@ void verify_reduced(program p,
std::cout << p << std::endl; std::cout << p << std::endl;
try try
{ {
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tols);
} }
catch(const std::exception& e) catch(const std::exception& e)
{ {
...@@ -174,7 +173,7 @@ void verify_reduced_program(const program& p, ...@@ -174,7 +173,7 @@ void verify_reduced_program(const program& p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
const auto* mm = p.get_main_module(); const auto* mm = p.get_main_module();
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
...@@ -187,7 +186,7 @@ void verify_reduced_program(const program& p, ...@@ -187,7 +186,7 @@ void verify_reduced_program(const program& p,
std::cout << "Skip: " << i << std::endl; std::cout << "Skip: " << i << std::endl;
continue; continue;
} }
verify_reduced(p, i, t, options, quantize, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tols);
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "precision.hpp" #include "precision.hpp"
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/verify.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -37,18 +38,18 @@ void verify_program(const std::string& name, ...@@ -37,18 +38,18 @@ void verify_program(const std::string& name,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 100); verify::tolerance tols = verify::tolerance{});
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment