Unverified Commit 07bef2a0 authored by Ted Themistokleous's avatar Ted Themistokleous Committed by GitHub
Browse files

Merge branch 'develop' into enable_navi_32_ci

parents fc60486e dcc7b0a5
...@@ -7,7 +7,7 @@ jobs: ...@@ -7,7 +7,7 @@ jobs:
benchmark: benchmark:
uses: ROCmSoftwarePlatform/actions/.github/workflows/benchmarks.yml@main uses: ROCmSoftwarePlatform/actions/.github/workflows/benchmarks.yml@main
with: with:
rocm_version: 5.2 rocm_version: 5.7
script_repo: migraphx-benchmark/benchmark-utils script_repo: migraphx-benchmark/benchmark-utils
result_path: /usr/share/migraphx/test-results result_path: /usr/share/migraphx/test-results
result_repo: ROCmSoftwarePlatform/comparison-results result_repo: ROCmSoftwarePlatform/comparison-results
......
...@@ -142,6 +142,7 @@ jobs: ...@@ -142,6 +142,7 @@ jobs:
-DROCM_ENABLE_GH_ANNOTATIONS=On \ -DROCM_ENABLE_GH_ANNOTATIONS=On \
-DCLANG_TIDY_DEPEND_ON_TARGET=Off \ -DCLANG_TIDY_DEPEND_ON_TARGET=Off \
-DCLANG_TIDY_CACHE=/data/tidy-cache \ -DCLANG_TIDY_CACHE=/data/tidy-cache \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) -k onnx-proto tf-proto tidy make -j$(nproc) -k onnx-proto tf-proto tidy
...@@ -191,6 +192,7 @@ jobs: ...@@ -191,6 +192,7 @@ jobs:
-DCPPCHECK_BUILD_DIR=/data/cppcheck-cache \ -DCPPCHECK_BUILD_DIR=/data/cppcheck-cache \
-DBUILD_DEV=On \ -DBUILD_DEV=On \
-DROCM_ENABLE_GH_ANNOTATIONS=On \ -DROCM_ENABLE_GH_ANNOTATIONS=On \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) cppcheck make -j$(nproc) cppcheck
...@@ -227,6 +229,7 @@ jobs: ...@@ -227,6 +229,7 @@ jobs:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -279,6 +282,7 @@ jobs: ...@@ -279,6 +282,7 @@ jobs:
-DBUILD_DEV=On \ -DBUILD_DEV=On \
-DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \ -DCMAKE_CXX_COMPILER_LAUNCHER=/usr/local/bin/ccache \
-DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \ -DCMAKE_C_COMPILER_LAUNCHER=/usr/local/bin/ccache \
-DGPU_TARGETS=gfx908 \
.. ..
make -j$(nproc) tests driver make -j$(nproc) tests driver
...@@ -305,6 +309,7 @@ jobs: ...@@ -305,6 +309,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -335,6 +340,7 @@ jobs: ...@@ -335,6 +340,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -375,6 +381,7 @@ jobs: ...@@ -375,6 +381,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
...@@ -493,6 +500,7 @@ jobs: ...@@ -493,6 +500,7 @@ jobs:
steps: steps:
- name: Free space - name: Free space
uses: jlumbroso/free-disk-space@main uses: jlumbroso/free-disk-space@main
continue-on-error: true
with: with:
tool-cache: true tool-cache: true
android: true android: true
......
...@@ -12,7 +12,7 @@ on: ...@@ -12,7 +12,7 @@ on:
rocm_release: rocm_release:
description: ROCm Version description: ROCm Version
required: true required: true
default: '5.6' default: '5.7'
performance_reports_repo: performance_reports_repo:
description: Repository where performance reports are stored description: Repository where performance reports are stored
required: true required: true
...@@ -50,7 +50,7 @@ jobs: ...@@ -50,7 +50,7 @@ jobs:
release: release:
uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main uses: ROCmSoftwarePlatform/migraphx-benchmark/.github/workflows/perf-test.yml@main
with: with:
rocm_release: ${{ github.event.inputs.rocm_release || '5.6' }} rocm_release: ${{ github.event.inputs.rocm_release || '5.7' }}
result_number: ${{ github.event.inputs.result_number || '10' }} result_number: ${{ github.event.inputs.result_number || '10' }}
flags: ${{ github.event.inputs.flags || '-r' }} flags: ${{ github.event.inputs.flags || '-r' }}
performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }} performance_reports_repo: ${{ github.event.inputs.performance_reports_repo || 'ROCmSoftwarePlatform/migraphx-reports' }}
......
...@@ -80,3 +80,6 @@ docs/html ...@@ -80,3 +80,6 @@ docs/html
cmake-build*/ cmake-build*/
build*/ build*/
# Recommended location to install rbuild dependencies from README.md
depend
...@@ -162,6 +162,8 @@ rocm_enable_clang_tidy( ...@@ -162,6 +162,8 @@ rocm_enable_clang_tidy(
-cppcoreguidelines-pro-type-vararg -cppcoreguidelines-pro-type-vararg
-cppcoreguidelines-special-member-functions -cppcoreguidelines-special-member-functions
-cppcoreguidelines-virtual-class-destructor -cppcoreguidelines-virtual-class-destructor
-cppcoreguidelines-avoid-capture-default-when-capturing-this
-cppcoreguidelines-rvalue-reference-param-not-moved
-google-readability-* -google-readability-*
-google-runtime-int -google-runtime-int
-google-runtime-references -google-runtime-references
......
...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && ...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl &&
curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.7/ focal main > /etc/apt/sources.list.d/rocm.list'
# From docs.amd.com for installing rocm. Needed to install properly # From docs.amd.com for installing rocm. Needed to install properly
RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600" RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600"
......
...@@ -109,11 +109,11 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -109,11 +109,11 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
stage('hipRTC Debug') { stage('hipRTC Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true) cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*')", gpu_debug: true)
} }
}, clang_release: rocmnode('cdna') { cmake_build -> }, clang_release: rocmnode('mi100+') { cmake_build ->
stage('Hip Clang Release') { stage('Hip Clang Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*')")
stash includes: 'build/*.deb', name: 'migraphx-package' stash includes: 'build/*.deb', name: 'migraphx-package'
} }
// }, hidden_symbols: rocmnode('cdna') { cmake_build -> // }, hidden_symbols: rocmnode('cdna') { cmake_build ->
...@@ -122,7 +122,7 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -122,7 +122,7 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
// } // }
}, all_targets_debug : rocmnode('cdna') { cmake_build -> }, all_targets_debug : rocmnode('cdna') { cmake_build ->
stage('All targets Release') { stage('All targets Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*')")
} }
}, mlir_debug: rocmnode('cdna') { cmake_build -> }, mlir_debug: rocmnode('cdna') { cmake_build ->
stage('MLIR Debug') { stage('MLIR Debug') {
...@@ -131,13 +131,13 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -131,13 +131,13 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
// Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS. // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}" def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*')")
} }
} }
}, ck_release: rocmnode('mi100+') { cmake_build -> }, ck_release: rocmnode('mi100+') { cmake_build ->
stage('CK Release') { stage('CK Release') {
withEnv(['MIGRAPHX_ENABLE_CK=1', 'MIGRAPHX_TUNE_CK=1']) { withEnv(['MIGRAPHX_ENABLE_CK=1', 'MIGRAPHX_TUNE_CK=1']) {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DGPU_TARGETS=$(/opt/rocm/bin/rocminfo | grep -o -m1 'gfx.*')")
} }
} }
}, clang_asan: rocmnode('nogpu') { cmake_build -> }, clang_asan: rocmnode('nogpu') { cmake_build ->
...@@ -163,13 +163,14 @@ def onnxnode(name, body) { ...@@ -163,13 +163,14 @@ def onnxnode(name, body) {
} }
} }
rocmtest onnx: onnxnode('cdna') { cmake_build -> rocmtest onnx: onnxnode('mi100+') { cmake_build ->
stage("Onnx runtime") { stage("Onnx runtime") {
sh ''' sh '''
apt install half apt install half
#ls -lR #ls -lR
md5sum ./build/*.deb md5sum ./build/*.deb
dpkg -i ./build/*.deb dpkg -i ./build/*.deb
env
cd /onnxruntime && ./build_and_test_onnxrt.sh cd /onnxruntime && ./build_and_test_onnxrt.sh
''' '''
} }
......
...@@ -21,7 +21,7 @@ charset-normalizer==3.1.0 ...@@ -21,7 +21,7 @@ charset-normalizer==3.1.0
# via requests # via requests
click==8.1.3 click==8.1.3
# via sphinx-external-toc # via sphinx-external-toc
cryptography==41.0.3 cryptography==41.0.4
# via pyjwt # via pyjwt
deprecated==1.2.13 deprecated==1.2.13
# via pygithub # via pygithub
...@@ -87,7 +87,7 @@ requests==2.28.2 ...@@ -87,7 +87,7 @@ requests==2.28.2
# via # via
# pygithub # pygithub
# sphinx # sphinx
rocm-docs-core>=0.20.0 rocm-docs-core==0.24.2
# via -r requirements.in # via -r requirements.in
smmap==5.0.0 smmap==5.0.0
# via gitdb # via gitdb
......
...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro ...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro
std::vector<float> results_vector(64); std::vector<float> results_vector(64);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify::verify_range(results_vector, sol)); EXPECT(migraphx::verify::verify_rms_range(results_vector, sol));
An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU. An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target. By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.
......
...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency ...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency
.. include:: ./driver/compile.rst .. include:: ./driver/compile.rst
.. option:: --tolerance [double] .. option:: --rms-tol [double]
Tolerance for errors (Default: 80) Tolerance for RMS error (Default: 0.001)
.. option:: --atol [double]
Tolerance for elementwise absolute difference (Default: 0.001)
.. option:: --rtol [double]
Tolerance for elementwise relative difference (Default: 0.001)
.. option:: -i, --per-instruction .. option:: -i, --per-instruction
......
...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as
| --exhaustive-tune | Enable exhaustive search to find fastest kernel | | --exhaustive-tune | Enable exhaustive search to find fastest kernel |
| --fp16 | Quantize for fp16 | | --fp16 | Quantize for fp16 |
| --int8 | Quantize for int8 | | --int8 | Quantize for int8 |
| --tolerance | Tolerance for errors | | --rms-tol | Tolerance for the RMS error (Default: 0.001) |
| --atol | Tolerance for elementwise absolute difference (Default: 0.001) |
| --rtol | Tolerance for elementwise relative difference (Default: 0.001) |
| --per-instruction \| -i | Verify each instruction | | --per-instruction \| -i | Verify each instruction |
| --reduce \| -r | Reduce program and verify | | --reduce \| -r | Reduce program and verify |
| --iterations \| -n | Number of iterations to run for perf report | | --iterations \| -n | Number of iterations to run for perf report |
......
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.7/ focal main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
......
...@@ -29,3 +29,12 @@ define = ...@@ -29,3 +29,12 @@ define =
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
MIGRAPHX_ENABLE_CPU=On MIGRAPHX_ENABLE_CPU=On
BUILD_DEV=On BUILD_DEV=On
[cibuild]
cxx = ${rocm_path}/llvm/bin/clang++
cc = ${rocm_path}/llvm/bin/clang
deps =
-f dev-requirements.txt
define =
CMAKE_C_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
CMAKE_CXX_COMPILER_LAUNCHER=${deps_dir}/bin/ccache
...@@ -197,6 +197,7 @@ register_migraphx_ops( ...@@ -197,6 +197,7 @@ register_migraphx_ops(
reduce_sum reduce_sum
relu relu
reshape reshape
reshape_lazy
reverse reverse
rnn rnn
rnn_last_cell_output rnn_last_cell_output
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -536,13 +536,19 @@ struct params : command<params> ...@@ -536,13 +536,19 @@ struct params : command<params>
struct verify : command<verify> struct verify : command<verify>
{ {
compiler c; compiler c;
double tolerance = 80; migraphx::verify::tolerance tols;
bool per_instruction = false; bool per_instruction = false;
bool reduce = false; bool reduce = false;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
c.parse(ap); c.parse(ap);
ap(tolerance, {"--tolerance"}, ap.help("Tolerance for errors")); ap(tols.rms_tol, {"--rms-tol"}, ap.help("Tolerance for the RMS error (Default: 0.001)"));
ap(tols.atol,
{"--atol"},
ap.help("Tolerance for the elementwise absolute difference (Default: 0.001)"));
ap(tols.rtol,
{"--rtol"},
ap.help("Tolerance for the elementwise relative difference (Default: 0.001)"));
ap(per_instruction, ap(per_instruction,
{"-i", "--per-instruction"}, {"-i", "--per-instruction"},
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
...@@ -567,15 +573,15 @@ struct verify : command<verify> ...@@ -567,15 +573,15 @@ struct verify : command<verify>
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, c.co, quantize, tolerance); verify_instructions(p, t, c.co, quantize, tols);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, t, c.co, quantize, m, tolerance); verify_reduced_program(p, t, c.co, quantize, m, tols);
} }
else else
{ {
verify_program(c.l.file, p, t, c.co, quantize, m, tolerance); verify_program(c.l.file, p, t, c.co, quantize, m, tols);
} }
} }
}; };
......
...@@ -77,24 +77,24 @@ void verify_program(const std::string& name, ...@@ -77,24 +77,24 @@ void verify_program(const std::string& name,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto x = run_ref(p, inputs); auto ref_outs = run_ref(p, inputs);
auto y = run_target(p, t, options, quantize, inputs); auto target_outs = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size(); std::size_t output_num = ref_outs.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
{ {
if(x[i].get_shape().type() != y[i].get_shape().type() or if(ref_outs[i].get_shape().type() != target_outs[i].get_shape().type() or
x[i].get_shape().lens() != y[i].get_shape().lens()) ref_outs[i].get_shape().lens() != target_outs[i].get_shape().lens())
{ {
std::cout << "FAILED: " << name << std::endl; std::cout << "FAILED: " << name << std::endl;
std::cout << "Shape mismatch {" << x[i].get_shape() << "} != {" << y[i].get_shape() std::cout << "Shape mismatch {" << ref_outs[i].get_shape() << "} != {"
<< "}" << std::endl; << target_outs[i].get_shape() << "}" << std::endl;
} }
else else
{ {
verify_args(name, x[i], y[i], tolerance); verify_args(name, target_outs[i], verify::expected{ref_outs[i]}, tols);
} }
} }
} }
...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog, ...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize, precision quantize,
double tolerance) verify::tolerance tols)
{ {
const auto* mm_prog = prog.get_main_module(); const auto* mm_prog = prog.get_main_module();
for(auto&& ins : (*mm_prog)) for(auto&& ins : (*mm_prog))
...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog, ...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog,
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program( verify_program(ins.name(), p, t, options, quantize, create_param_map(p, false), tols);
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -151,7 +150,7 @@ void verify_reduced(program p, ...@@ -151,7 +150,7 @@ void verify_reduced(program p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
auto last = std::prev(mm->end(), n); auto last = std::prev(mm->end(), n);
...@@ -160,7 +159,7 @@ void verify_reduced(program p, ...@@ -160,7 +159,7 @@ void verify_reduced(program p,
std::cout << p << std::endl; std::cout << p << std::endl;
try try
{ {
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tols);
} }
catch(const std::exception& e) catch(const std::exception& e)
{ {
...@@ -174,7 +173,7 @@ void verify_reduced_program(const program& p, ...@@ -174,7 +173,7 @@ void verify_reduced_program(const program& p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
const auto* mm = p.get_main_module(); const auto* mm = p.get_main_module();
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
...@@ -187,7 +186,7 @@ void verify_reduced_program(const program& p, ...@@ -187,7 +186,7 @@ void verify_reduced_program(const program& p,
std::cout << "Skip: " << i << std::endl; std::cout << "Skip: " << i << std::endl;
continue; continue;
} }
verify_reduced(p, i, t, options, quantize, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tols);
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "precision.hpp" #include "precision.hpp"
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/verify.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -37,18 +38,18 @@ void verify_program(const std::string& name, ...@@ -37,18 +38,18 @@ void verify_program(const std::string& name,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 100); verify::tolerance tols = verify::tolerance{});
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
......
...@@ -81,6 +81,7 @@ struct MIGRAPHX_EXPORT instruction ...@@ -81,6 +81,7 @@ struct MIGRAPHX_EXPORT instruction
const std::vector<module_ref>& module_inputs() const; const std::vector<module_ref>& module_inputs() const;
/// Where this instruction is used as an input to another instruction
const std::vector<instruction_ref>& outputs() const; const std::vector<instruction_ref>& outputs() const;
friend bool operator==(const instruction& x, const instruction& y); friend bool operator==(const instruction& x, const instruction& y);
......
...@@ -49,17 +49,22 @@ struct allocate ...@@ -49,17 +49,22 @@ struct allocate
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
migraphx::check_shapes{inputs, *this, true}.has(0, 1);
// check if shape attribute is not default
if(s != shape()) if(s != shape())
{ {
if(inputs.size() == 1)
{
migraphx::check_shapes{inputs, *this, false}.only_dims(1);
}
else
{
migraphx::check_shapes{inputs, *this, false}.has(0);
}
return s; return s;
} }
else else
{ {
migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
const auto& out_dims = inputs.at(0); const auto& out_dims = inputs.at(0);
assert(not out_dims.dynamic());
assert(out_dims.ndim() == 1);
std::size_t max_val = std::numeric_limits<std::size_t>::max(); std::size_t max_val = std::numeric_limits<std::size_t>::max();
std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0), std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
shape::dynamic_dimension{0, max_val}); shape::dynamic_dimension{0, max_val});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment