Commit 29820def authored by Paul's avatar Paul
Browse files

Merge

parents 6aa89319 be33669b
...@@ -8,6 +8,9 @@ on: ...@@ -8,6 +8,9 @@ on:
- master - master
- 'release/**' - 'release/**'
env:
DOCKER_USER: ${{secrets.DOCKERHUB_USERID}}
DOCKER_TOKEN: ${{secrets.DOCKERHUB_TOKEN}}
jobs: jobs:
cancel: cancel:
...@@ -17,22 +20,92 @@ jobs: ...@@ -17,22 +20,92 @@ jobs:
uses: styfle/cancel-workflow-action@0.11.0 uses: styfle/cancel-workflow-action@0.11.0
with: with:
access_token: ${{ github.token }} access_token: ${{ github.token }}
tidy:
check_image:
name: Check if image exists in registry
runs-on: ubuntu-latest
outputs:
imageexists: ${{ steps.check_image.outputs.imageexists }}
imagetag: ${{ steps.image_hash.outputs.imagetag }}
imageexists_sles: ${{ steps.check_image.outputs.imageexists_sles }}
imagetag_sles: ${{ steps.image_hash.outputs.imagetag_sles }}
steps:
- name: Checkout Code
uses: actions/checkout@v3
- name: Create Image Tag
id: image_hash
run: |
echo "imagetag=rocm/migraphx-private:hip-clang-${{hashFiles('**/hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', '**/rbuild.ini')}}" >> $GITHUB_OUTPUT
echo "imagetag_sles=rocm/migraphx-sles-private:hip-clang-${{hashFiles('**/tools/docker/sles.docker', '**/*requirements.txt', '**/install_prereqs.sh', '**/rbuild.ini')}}" >> $GITHUB_OUTPUT
- name: Check if image is built already
id: check_image
env:
DOCKERIMAGE: ${{ steps.image_hash.outputs.imagetag }}
DOCKERIMAGE_SLES: ${{ steps.image_hash.outputs.imagetag_sles }}
run: |
echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
if [[ "$(docker manifest inspect $DOCKERIMAGE 2> /dev/null)" != "" ]]; then
echo "imageexists=true" >> $GITHUB_OUTPUT
echo "Image already exists, skip building available"
else
echo "imageexists=false" >> $GITHUB_OUTPUT
echo "Tag does not exist, build and publishing required"
fi
if [[ "$(docker manifest inspect $DOCKERIMAGE_SLES 2> /dev/null)" != "" ]]; then
echo "imageexists_sles=true" >> $GITHUB_OUTPUT
echo "SLES Image already exists, skip building available"
else
echo "imageexists_sles=false" >> $GITHUB_OUTPUT
echo "SLES Tag does not exist, build and publishing required"
fi
build_image:
name: Build image
runs-on: ROCM-Ubuntu runs-on: ROCM-Ubuntu
needs: check_image
if: ${{ needs.check_image.outputs.imageexists != 'true' }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
# In this step, this action saves a list of existing images, - name: Build and publish
# the cache is created without them in the post run. env:
# It also restores the cache if it exists. DOCKERIMAGE: ${{ needs.check_image.outputs.imagetag }}
- name: Docker layer cache run: |
uses: jpribyl/action-docker-layer-caching@v0.1.1 echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
with: docker build . --file hip-clang.docker --tag $DOCKERIMAGE;
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}} docker push $DOCKERIMAGE;
restore-keys:
docker-layer-caching-migraphx- build_SLES_image:
# Ignore the failure of a step and avoid terminating the job. name: Build SLES image
continue-on-error: true runs-on: ROCM-Ubuntu
needs: check_image
if: ${{ needs.check_image.outputs.imageexists_sles != 'true' }}
steps:
- uses: actions/checkout@v3
- name: Build and publish SLES
env:
DOCKERIMAGE_SLES: ${{ needs.check_image.outputs.imagetag_sles }}
run: |
echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
docker build . --file tools/docker/sles.docker --tag $DOCKERIMAGE_SLES;
docker push $DOCKERIMAGE_SLES;
tidy:
runs-on: ROCM-Ubuntu
needs: [ build_image, check_image ]
env:
DOCKERIMAGE: ${{ needs.check_image.outputs.imagetag }}
if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
steps:
- uses: actions/checkout@v3
- name: Restore cache files for tidy - name: Restore cache files for tidy
uses: actions/cache/restore@v3 uses: actions/cache/restore@v3
...@@ -42,12 +115,12 @@ jobs: ...@@ -42,12 +115,12 @@ jobs:
key: tidy-cache-${{ github.ref }} key: tidy-cache-${{ github.ref }}
restore-keys: tidy-cache- restore-keys: tidy-cache-
- name: Build the Docker image - name: Docker Login
run: | run: |
docker build . --file hip-clang.docker --tag migraphx echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
- name: Clang tidy - name: Clang Tidy
shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}" shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKERIMAGE bash < {0}"
run: | run: |
mkdir build mkdir build
cd build cd build
...@@ -84,21 +157,14 @@ jobs: ...@@ -84,21 +157,14 @@ jobs:
cppcheck: cppcheck:
runs-on: ROCM-Ubuntu runs-on: ROCM-Ubuntu
needs: [ build_image, check_image ]
env:
DOCKERIMAGE: ${{ needs.check_image.outputs.imagetag }}
if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Restore cache files for cppcheck - name: Restore cache files for cppcheck
id: cppcheck_restore id: cppcheck_restore
uses: actions/cache/restore@v3 uses: actions/cache/restore@v3
...@@ -107,11 +173,12 @@ jobs: ...@@ -107,11 +173,12 @@ jobs:
key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }} key: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-${{ github.ref }}
restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}- restore-keys: cppcheck-cache-${{ hashFiles('cppcheck.rules', 'CMakeLists.txt') }}-
- name: Build the Docker image - name: Docker Login
run: docker build . --file hip-clang.docker --tag migraphx run: |
echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
- name: Cppcheck - name: Cppcheck
shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}" shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKERIMAGE bash < {0}"
run: | run: |
mkdir build mkdir build
cd build cd build
...@@ -142,29 +209,23 @@ jobs: ...@@ -142,29 +209,23 @@ jobs:
format: format:
runs-on: ROCM-Ubuntu runs-on: ubuntu-latest
needs: [ build_image, check_image ]
env:
DOCKERIMAGE: ${{ needs.check_image.outputs.imagetag }}
if: ${{ !cancelled() && (needs.build_image.result == 'success' || needs.build_image.result == 'skipped') }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
# In this step, this action saves a list of existing images, - name: Docker Login
# the cache is created without them in the post run. run: |
# It also restores the cache if it exists. echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Build the Docker image
run: docker build . --file hip-clang.docker --tag migraphx
- name: Check formatting - name: Check formatting
shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx bash < {0}" shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKERIMAGE bash < {0}"
run: | run: |
set -e set -e
git config --global --add safe.directory /data git config --global --add safe.directory /data
...@@ -172,26 +233,16 @@ jobs: ...@@ -172,26 +233,16 @@ jobs:
sles: sles:
runs-on: ROCM-Ubuntu runs-on: ROCM-Ubuntu
needs: [ build_SLES_image, check_image ]
env:
DOCKERIMAGE_SLES: ${{ needs.check_image.outputs.imagetag_sles }}
if: ${{ !cancelled() && (needs.build_SLES_image.result == 'success' || needs.build_SLES_image.result == 'skipped') }}
steps: steps:
- uses: actions/checkout@v3 - uses: actions/checkout@v3
with: with:
fetch-depth: 0 fetch-depth: 0
# In this step, this action saves a list of existing images,
# the cache is created without them in the post run.
# It also restores the cache if it exists.
- name: Docker layer cache
uses: jpribyl/action-docker-layer-caching@v0.1.1
with:
key: docker-layer-caching-migraphx-sles-${{hashFiles('hip-clang.docker', '**/*requirements.txt', '**/install_prereqs.sh', 'rbuild.ini')}}
restore-keys:
docker-layer-caching-migraphx-sles-
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Build the Docker image
run: docker build . --file tools/docker/sles.docker --tag migraphx-sles
- name: Restore cache files for ccache - name: Restore cache files for ccache
uses: actions/cache/restore@v3 uses: actions/cache/restore@v3
id: ccache_restore id: ccache_restore
...@@ -200,8 +251,12 @@ jobs: ...@@ -200,8 +251,12 @@ jobs:
key: ccache-sles-${{ github.ref }} key: ccache-sles-${{ github.ref }}
restore-keys: ccache-sles- restore-keys: ccache-sles-
- name: Docker Login
run: |
echo $DOCKER_TOKEN | docker login -u $DOCKER_USER --password-stdin
- name: Build migraphx - name: Build migraphx
shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data migraphx-sles bash < {0}" shell: bash -c "docker run -i -v=$GITHUB_WORKSPACE:/data -w /data $DOCKERIMAGE_SLES bash < {0}"
run: | run: |
set -e set -e
export CCACHE_COMPRESSLEVEL=10 export CCACHE_COMPRESSLEVEL=10
......
...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro ...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro
std::vector<float> results_vector(64); std::vector<float> results_vector(64);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify::verify_range(results_vector, sol)); EXPECT(migraphx::verify::verify_rms_range(results_vector, sol));
An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU. An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target. By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.
......
...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency ...@@ -50,9 +50,17 @@ Runs reference and CPU or GPU implementations and checks outputs for consistency
.. include:: ./driver/compile.rst .. include:: ./driver/compile.rst
.. option:: --tolerance [double] .. option:: --rms-tol [double]
Tolerance for errors (Default: 80) Tolerance for RMS error (Default: 0.001)
.. option:: --atol [double]
Tolerance for elementwise absolute difference (Default: 0.001)
.. option:: --rtol [double]
Tolerance for elementwise relative difference (Default: 0.001)
.. option:: -i, --per-instruction .. option:: -i, --per-instruction
......
...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as ...@@ -55,7 +55,9 @@ See below for a comprehensive list of commands and option arguments, as well as
| --exhaustive-tune | Enable exhaustive search to find fastest kernel | | --exhaustive-tune | Enable exhaustive search to find fastest kernel |
| --fp16 | Quantize for fp16 | | --fp16 | Quantize for fp16 |
| --int8 | Quantize for int8 | | --int8 | Quantize for int8 |
| --tolerance | Tolerance for errors | | --rms-tol | Tolerance for the RMS error (Default: 0.001) |
| --atol | Tolerance for elementwise absolute difference (Default: 0.001) |
| --rtol | Tolerance for elementwise relative difference (Default: 0.001) |
| --per-instruction \| -i | Verify each instruction | | --per-instruction \| -i | Verify each instruction |
| --reduce \| -r | Reduce program and verify | | --reduce \| -r | Reduce program and verify |
| --iterations \| -n | Number of iterations to run for perf report | | --iterations \| -n | Number of iterations to run for perf report |
......
...@@ -142,6 +142,7 @@ register_migraphx_ops( ...@@ -142,6 +142,7 @@ register_migraphx_ops(
equal equal
erf erf
exp exp
fill
flatten flatten
floor floor
fmod fmod
......
...@@ -475,13 +475,15 @@ struct compiler ...@@ -475,13 +475,15 @@ struct compiler
{ {
if(is_offload_copy_set(p) and not co.offload_copy) if(is_offload_copy_set(p) and not co.offload_copy)
{ {
std::cout << "MIGraphX program was likely compiled with offload_copy set, Try " std::cout
<< "[WARNING]: MIGraphX program was likely compiled with offload_copy "
"set, Try "
"passing " "passing "
"`--enable-offload-copy` if program run fails.\n"; "`--enable-offload-copy` if program run fails.\n";
} }
else if(co.offload_copy) else if(co.offload_copy)
{ {
std::cout << "MIGraphX program was likely compiled without " std::cout << "[WARNING]: MIGraphX program was likely compiled without "
"offload_copy set, Try " "offload_copy set, Try "
"removing " "removing "
"`--enable-offload-copy` flag if passed to driver, if program run " "`--enable-offload-copy` flag if passed to driver, if program run "
...@@ -534,13 +536,19 @@ struct params : command<params> ...@@ -534,13 +536,19 @@ struct params : command<params>
struct verify : command<verify> struct verify : command<verify>
{ {
compiler c; compiler c;
double tolerance = 80; migraphx::verify::tolerance tols;
bool per_instruction = false; bool per_instruction = false;
bool reduce = false; bool reduce = false;
void parse(argument_parser& ap) void parse(argument_parser& ap)
{ {
c.parse(ap); c.parse(ap);
ap(tolerance, {"--tolerance"}, ap.help("Tolerance for errors")); ap(tols.rms_tol, {"--rms-tol"}, ap.help("Tolerance for the RMS error (Default: 0.001)"));
ap(tols.atol,
{"--atol"},
ap.help("Tolerance for the elementwise absolute difference (Default: 0.001)"));
ap(tols.rtol,
{"--rtol"},
ap.help("Tolerance for the elementwise relative difference (Default: 0.001)"));
ap(per_instruction, ap(per_instruction,
{"-i", "--per-instruction"}, {"-i", "--per-instruction"},
ap.help("Verify each instruction"), ap.help("Verify each instruction"),
...@@ -565,15 +573,15 @@ struct verify : command<verify> ...@@ -565,15 +573,15 @@ struct verify : command<verify>
if(per_instruction) if(per_instruction)
{ {
verify_instructions(p, t, c.co, quantize, tolerance); verify_instructions(p, t, c.co, quantize, tols);
} }
else if(reduce) else if(reduce)
{ {
verify_reduced_program(p, t, c.co, quantize, m, tolerance); verify_reduced_program(p, t, c.co, quantize, m, tols);
} }
else else
{ {
verify_program(c.l.file, p, t, c.co, quantize, m, tolerance); verify_program(c.l.file, p, t, c.co, quantize, m, tols);
} }
} }
}; };
......
...@@ -77,24 +77,24 @@ void verify_program(const std::string& name, ...@@ -77,24 +77,24 @@ void verify_program(const std::string& name,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto x = run_ref(p, inputs); auto ref_outs = run_ref(p, inputs);
auto y = run_target(p, t, options, quantize, inputs); auto target_outs = run_target(p, t, options, quantize, inputs);
std::size_t output_num = x.size(); std::size_t output_num = ref_outs.size();
for(std::size_t i = 0; i < output_num; ++i) for(std::size_t i = 0; i < output_num; ++i)
{ {
if(x[i].get_shape().type() != y[i].get_shape().type() or if(ref_outs[i].get_shape().type() != target_outs[i].get_shape().type() or
x[i].get_shape().lens() != y[i].get_shape().lens()) ref_outs[i].get_shape().lens() != target_outs[i].get_shape().lens())
{ {
std::cout << "FAILED: " << name << std::endl; std::cout << "FAILED: " << name << std::endl;
std::cout << "Shape mismatch {" << x[i].get_shape() << "} != {" << y[i].get_shape() std::cout << "Shape mismatch {" << ref_outs[i].get_shape() << "} != {" << target_outs[i].get_shape()
<< "}" << std::endl; << "}" << std::endl;
} }
else else
{ {
verify_args(name, x[i], y[i], tolerance); verify_args(name, target_outs[i], verify::expected{ref_outs[i]}, tols);
} }
} }
} }
...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog, ...@@ -103,7 +103,7 @@ void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options, compile_options options,
precision quantize, precision quantize,
double tolerance) verify::tolerance tols)
{ {
const auto* mm_prog = prog.get_main_module(); const auto* mm_prog = prog.get_main_module();
for(auto&& ins : (*mm_prog)) for(auto&& ins : (*mm_prog))
...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog, ...@@ -134,8 +134,7 @@ void verify_instructions(const program& prog,
{ {
std::cout << "Verify: " << ins.name() << std::endl; std::cout << "Verify: " << ins.name() << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
verify_program( verify_program(ins.name(), p, t, options, quantize, create_param_map(p, false), tols);
ins.name(), p, t, options, quantize, create_param_map(p, false), tolerance);
} }
catch(...) catch(...)
{ {
...@@ -151,13 +150,14 @@ void verify_reduced(program p, ...@@ -151,13 +150,14 @@ void verify_reduced(program p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
auto* mm = p.get_main_module(); auto* mm = p.get_main_module();
auto last = std::prev(mm->end(), n); auto last = std::prev(mm->end(), n);
mm->remove_instructions(last, mm->end()); mm->remove_instructions(last, mm->end());
std::cout << "Verify: " << n << std::endl; std::cout << "Verify: " << n << std::endl;
std::cout << p << std::endl; std::cout << p << std::endl;
<<<<<<< HEAD
try try
{ {
verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance); verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
...@@ -167,6 +167,9 @@ void verify_reduced(program p, ...@@ -167,6 +167,9 @@ void verify_reduced(program p,
std::cout << "FAILED: " << n << std::endl; std::cout << "FAILED: " << n << std::endl;
std::cout << "Exception: " << e.what() << std::endl; std::cout << "Exception: " << e.what() << std::endl;
} }
=======
verify_program(std::to_string(n), p, t, options, quantize, inputs, tols);
>>>>>>> origin/thres_tole
} }
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
...@@ -174,13 +177,14 @@ void verify_reduced_program(const program& p, ...@@ -174,13 +177,14 @@ void verify_reduced_program(const program& p,
compile_options options, compile_options options,
precision quantize, precision quantize,
const parameter_map& inputs, const parameter_map& inputs,
double tolerance) verify::tolerance tols)
{ {
const auto* mm = p.get_main_module(); const auto* mm = p.get_main_module();
auto n = std::distance(mm->begin(), mm->end()); auto n = std::distance(mm->begin(), mm->end());
std::cout << "Verify steps: " << n << std::endl; std::cout << "Verify steps: " << n << std::endl;
for(std::size_t i = 1; i < n; i++) for(std::size_t i = 1; i < n; i++)
{ {
<<<<<<< HEAD
auto last = std::prev(mm->end(), i + 1); auto last = std::prev(mm->end(), i + 1);
if(contains({"@literal", "@param"}, last->name())) if(contains({"@literal", "@param"}, last->name()))
{ {
...@@ -188,6 +192,9 @@ void verify_reduced_program(const program& p, ...@@ -188,6 +192,9 @@ void verify_reduced_program(const program& p,
continue; continue;
} }
verify_reduced(p, i, t, options, quantize, inputs, tolerance); verify_reduced(p, i, t, options, quantize, inputs, tolerance);
=======
verify_reduced(p, i, t, options, quantize, inputs, tols);
>>>>>>> origin/thres_tole
} }
} }
......
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "precision.hpp" #include "precision.hpp"
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/verify.hpp>
namespace migraphx { namespace migraphx {
namespace driver { namespace driver {
...@@ -37,18 +38,18 @@ void verify_program(const std::string& name, ...@@ -37,18 +38,18 @@ void verify_program(const std::string& name,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 100); verify::tolerance tols = verify::tolerance{});
void verify_instructions(const program& prog, void verify_instructions(const program& prog,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
void verify_reduced_program(const program& p, void verify_reduced_program(const program& p,
const target& t, const target& t,
compile_options options = compile_options{}, compile_options options = compile_options{},
precision quantize = precision::fp32, precision quantize = precision::fp32,
const parameter_map& inputs = {}, const parameter_map& inputs = {},
double tolerance = 80); verify::tolerance tols = verify::tolerance{});
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
......
...@@ -153,7 +153,7 @@ struct check_shapes ...@@ -153,7 +153,7 @@ struct check_shapes
{ {
if(begin != end) if(begin != end)
{ {
if(begin->max_lens().size() != n) if(begin->ndim() != n)
MIGRAPHX_THROW(prefix() + "Only " + std::to_string(n) + "d supported"); MIGRAPHX_THROW(prefix() + "Only " + std::to_string(n) + "d supported");
} }
return *this; return *this;
...@@ -168,7 +168,7 @@ struct check_shapes ...@@ -168,7 +168,7 @@ struct check_shapes
{ {
if(begin != end) if(begin != end)
{ {
if(begin->max_lens().size() > n) if(begin->ndim() > n)
MIGRAPHX_THROW(prefix() + "Shape must have at most " + std::to_string(n) + MIGRAPHX_THROW(prefix() + "Shape must have at most " + std::to_string(n) +
" dimensions"); " dimensions");
} }
...@@ -184,7 +184,7 @@ struct check_shapes ...@@ -184,7 +184,7 @@ struct check_shapes
{ {
if(begin != end) if(begin != end)
{ {
if(begin->max_lens().size() < n) if(begin->ndim() < n)
MIGRAPHX_THROW(prefix() + "Shape must have at least " + std::to_string(n) + MIGRAPHX_THROW(prefix() + "Shape must have at least " + std::to_string(n) +
" dimensions"); " dimensions");
} }
...@@ -254,6 +254,16 @@ struct check_shapes ...@@ -254,6 +254,16 @@ struct check_shapes
return *this; return *this;
} }
/*!
* Check all shapes are scalar.
*/
const check_shapes& scalar() const
{
if(not this->all_of([](const shape& s) { return s.scalar(); }))
MIGRAPHX_THROW(prefix() + "Shapes are not a scalar");
return *this;
}
/*! /*!
* Check all shapes are standard or scalar. * Check all shapes are standard or scalar.
*/ */
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_OPERATORS_FILL_HPP
#define MIGRAPHX_GUARD_OPERATORS_FILL_HPP
#include <migraphx/check_shapes.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/par_for.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace op {
/**
* fill(default_value, output_buffer)
* Fill an output buffer with the given default_value.
* Note that if the default_value is a literal and the output_buffer
* has a static shape this operator can be replaced with a literal.
*/
struct fill
{
std::string name() const { return "fill"; }
shape compute_shape(std::vector<shape> inputs) const
{
check_shapes{inputs, *this, true}.has(2).same_type();
if(inputs.at(0).dynamic() or inputs.at(0).elements() != 1)
{
MIGRAPHX_THROW("FILL: default_value is dynamic or more than one element");
}
return inputs.back();
}
argument compute(const dyn_output& dyn_out, std::vector<argument> args) const
{
visit_all(args[0], args[1])([&](auto value, auto output) {
par_for(dyn_out.computed_shape.elements(), [&](auto i) { output[i] = value.front(); });
});
return args[1];
}
std::ptrdiff_t output_alias(const std::vector<shape>&) const { return 1; }
};
} // namespace op
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -55,6 +55,7 @@ ...@@ -55,6 +55,7 @@
#include <migraphx/op/equal.hpp> #include <migraphx/op/equal.hpp>
#include <migraphx/op/erf.hpp> #include <migraphx/op/erf.hpp>
#include <migraphx/op/exp.hpp> #include <migraphx/op/exp.hpp>
#include <migraphx/op/fill.hpp>
#include <migraphx/op/flatten.hpp> #include <migraphx/op/flatten.hpp>
#include <migraphx/op/floor.hpp> #include <migraphx/op/floor.hpp>
#include <migraphx/op/fmod.hpp> #include <migraphx/op/fmod.hpp>
......
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
#include <functional> #include <functional>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <assert.h>
#include <migraphx/float_equal.hpp> #include <migraphx/float_equal.hpp>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
...@@ -187,16 +188,99 @@ double rms_range(const R1& r1, const R2& r2) ...@@ -187,16 +188,99 @@ double rms_range(const R1& r1, const R2& r2)
return std::numeric_limits<range_value<R1>>::max(); return std::numeric_limits<range_value<R1>>::max();
} }
template <class R>
double get_rms_tol(const R&, std::size_t tolerance = 80)
{
double threshold = std::numeric_limits<range_value<R>>::epsilon() * tolerance;
return threshold;
}
/*
C++ doesn't support named arguments, this is just wrapper that helps distinguish between actual
results v/s expected results arguments.
*/
template <class T>
struct expected
{
expected() = default;
explicit expected(const T& input) : x(&input) {}
const T& data() const
{
assert(x != nullptr);
return *x;
}
private:
const T* x = nullptr;
};
// deduction guide for templated expected class
template <class T>
expected(const T&) -> expected<T>;
struct tolerance
{
double rms_tol = 0.001;
double atol = 0.001;
double rtol = 0.001;
};
/*
MIGraphX implementation of numpy's np.allclose() which checks if elementwise absolute diff is within
tolerance using this formula: abs(a - b) < atol + rtol(abs(b))
*/
template <class R1, class R2> template <class R1, class R2>
bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out_error = nullptr) bool allclose(const R1& r1, const R2& r2, tolerance tols)
{ {
double threshold = std::numeric_limits<range_value<R1>>::epsilon() * tolerance; std::size_t n = range_distance(r1);
if(n == range_distance(r2))
{
auto idx = mismatch_idx(r1, r2, [&](auto x, auto y) {
return abs_diff(double(x), double(y)) > tols.atol + tols.rtol * std::abs(double(y));
});
return idx >= range_distance(r1);
}
return false;
}
template <class R1, class R2>
bool verify_rms_range(const R1& r1,
const R2& r2,
std::size_t tolerance = 80,
double* out_rms_error = nullptr)
{
double threshold = get_rms_tol(r1, tolerance);
auto error = rms_range(r1, r2); auto error = rms_range(r1, r2);
if(out_error != nullptr) if(out_rms_error != nullptr)
*out_error = error; *out_rms_error = error;
return error <= threshold; return error <= threshold;
} }
template <class R1, class R2>
bool verify_range_with_tolerance(const R1& r1,
const expected<R2>& r2,
tolerance tols = tolerance{},
double* out_rms_error = nullptr)
{
auto rms_error = rms_range(r1, r2.data());
// disable ewise_verify for now, it requires lot of tests to be fixed
// auto ewise_verify = allclose(r1, r2.data(), tols);
if(out_rms_error != nullptr)
*out_rms_error = rms_error;
return rms_error <= tols.rms_tol;
}
// expected argument should be passed as second, but if it is passed as the first by mistake then
// flip the order
template <class R1, class R2>
bool verify_range_with_tolerance(const expected<R1>& r1,
const R2& r2,
tolerance tols = tolerance{},
double* out_rms_error = nullptr)
{
return verify_rms_range(r2, r1, tols, out_rms_error);
}
} // namespace verify } // namespace verify
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -31,11 +31,15 @@ ...@@ -31,11 +31,15 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_EXPORT MIGRAPHX_EXPORT bool verify_args(const std::string& name,
bool verify_args(const std::string& name,
const argument& ref_arg,
const argument& target_arg, const argument& target_arg,
double tolerance = 80); const verify::expected<argument>& ref_arg,
verify::tolerance);
MIGRAPHX_EXPORT bool verify_args_with_tolerance(const std::string& name,
const argument& target_arg,
const verify::expected<argument>& ref_arg,
std::size_t tolerance = 80);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/instruction.hpp>
#include <migraphx/load_save.hpp> #include <migraphx/load_save.hpp>
#include <migraphx/file_buffer.hpp> #include <migraphx/file_buffer.hpp>
#include <migraphx/json.hpp> #include <migraphx/json.hpp>
...@@ -60,9 +61,29 @@ void save(const program& p, const std::string& filename, const file_options& opt ...@@ -60,9 +61,29 @@ void save(const program& p, const std::string& filename, const file_options& opt
{ {
write_buffer(filename, save_buffer(p, options)); write_buffer(filename, save_buffer(p, options));
} }
// MIOpen doesn't support serializing fusion plans with Find-2.0 APIs
void print_miopen_warning(const program& p)
{
auto mods = p.get_modules();
if(std::any_of(mods.begin(), mods.end(), [](const auto* m) {
return std::any_of(m->begin(), m->end(), [](const instruction& i) {
return i.name() == "gpu::miopen_fusion";
});
}))
{
std::cout << "[WARNING]: Program has miopen_fusion instructions for which tuned solutions "
"are not stored inside serialized MIGraphX program. Consider serializing with "
"MIGRAPHX_DISABLE_MIOPEN_FUSION=1 flag set."
<< std::endl;
;
}
}
std::vector<char> save_buffer(const program& p, const file_options& options) std::vector<char> save_buffer(const program& p, const file_options& options)
{ {
value v = p.to_value(); value v = p.to_value();
print_miopen_warning(p);
std::vector<char> buffer; std::vector<char> buffer;
if(options.format == "msgpack") if(options.format == "msgpack")
{ {
......
...@@ -1327,46 +1327,57 @@ struct find_split_reshape ...@@ -1327,46 +1327,57 @@ struct find_split_reshape
{ {
auto slc = r.instructions["slice"]; auto slc = r.instructions["slice"];
auto rsp = r.instructions["reshape"]; auto rsp = r.instructions["reshape"];
auto input = slc->inputs().front(); auto input = slc->inputs().front();
// Only apply simplification when slices are on a single axis
auto axes = any_cast<op::slice>(slc->get_operator()).axes;
if(axes.size() > 1)
{
return;
}
auto split_outputs = get_splits(input); auto split_outputs = get_splits(input);
if(split_outputs.empty()) if(split_outputs.empty())
{ {
return; return;
} }
// Only want to apply this optimization if each split output is followed by // Find all the reshapes (similar to rsp) that can be simplified
// a contiguous op and a reshape std::vector<instruction_ref> conts;
if(std::any_of(split_outputs.begin(), split_outputs.end(), [](auto i) { std::vector<instruction_ref> vec_rsp;
if(i->outputs().size() == 1)
// Iterate through slice and contiguous outputs to allow simplifications when
// slice is followed by multiple reshapes
for(auto& i : split_outputs)
{ {
auto cont = i->outputs().front(); std::copy_if(i->outputs().begin(),
return cont->outputs().size() != 1; i->outputs().end(),
std::back_inserter(conts),
[](auto j) { return j->name() == "contiguous"; });
} }
return false;
})) for(auto& i : conts)
{ {
return; std::copy_if(i->outputs().begin(),
i->outputs().end(),
std::back_inserter(vec_rsp),
[&](auto j) { return j->get_operator() == rsp->get_operator(); });
} }
std::vector<instruction_ref> vec_rsp(split_outputs.size()); // No simplification needed if there is only one slice -> cont -> reshape
std::transform(split_outputs.begin(), split_outputs.end(), vec_rsp.begin(), [](auto i) { if(vec_rsp.size() <= 1)
auto cont = i->outputs().front();
return cont->outputs().front();
});
// all outputs are reshape and of the same shape
auto dims = any_cast<op::reshape>(rsp->get_operator()).dims;
if(not same_ops(vec_rsp))
{ {
return; return;
} }
// ensure reshape happens after the axis dimension // ensure reshape happens after the axis dimension
auto axis = any_cast<op::slice>(slc->get_operator()).axes[0]; auto axis = axes[0];
auto slc_lens = slc->get_shape().lens(); auto slc_lens = slc->get_shape().lens();
auto slc_dim_size = std::accumulate( auto slc_dim_size = std::accumulate(
slc_lens.begin() + axis, slc_lens.end(), 1, std::multiplies<std::size_t>()); slc_lens.begin() + axis, slc_lens.end(), 1, std::multiplies<std::size_t>());
auto input_lens = input->get_shape().lens();
auto input_size = input->get_shape().elements();
auto slc_axis_len = input_lens[axis];
// search the reshape output (standard shape) to decide which axis are // search the reshape output (standard shape) to decide which axis are
// in its output corresponding to the slc_dim_size // in its output corresponding to the slc_dim_size
...@@ -1393,16 +1404,67 @@ struct find_split_reshape ...@@ -1393,16 +1404,67 @@ struct find_split_reshape
{ {
rsp_axis = std::distance(rsp_strides.begin(), ait); rsp_axis = std::distance(rsp_strides.begin(), ait);
} }
// calculate reshape output shape
std::vector<int64_t> vec_dims(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), vec_dims.begin(), [&](auto is) { // Calculate reshape output shape
return is->get_shape().lens()[rsp_axis]; // Need to find a reshape such that data represented by instructions in vec_rsp can be
}); // written as slices of this new reshape. This is done by holding all the dims constant in
// rsp_lens to compute the required dim for rsp_axis (axis that will be sliced)
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1, 2, 4}, rsp_fixed_size = 2*1*2*4 = 16
// rsp_axis_len = 2*12*4 / 16 = 6
// rsp_out_lens (final) = {2, 6, 2, 4}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// rsp_axis = 1, rsp_out_lens (initial) = {2, 1}, rsp_fixed_size = 2*1 = 2
// rsp_axis_len = 2*12*4 / 2 = 48
// rsp_out_lens (final) = {2, 48}
std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end()); std::vector<int64_t> rsp_out_lens(rsp_lens.begin(), rsp_lens.end());
rsp_out_lens[rsp_axis] = 1;
auto rsp_fixed_size = std::accumulate(
rsp_out_lens.begin(), rsp_out_lens.end(), 1, std::multiplies<std::size_t>());
rsp_out_lens[rsp_axis] = std::accumulate(vec_dims.begin(), vec_dims.end(), std::int64_t{0}); // cannot create a valid reshape for simplification
if(input_size % rsp_fixed_size != 0)
{
return;
}
auto rsp_axis_len = input_size / rsp_fixed_size;
rsp_out_lens[rsp_axis] = rsp_axis_len;
// Calculate new slice start and end indices. Indices are scaled using the new reshape axis
// and the original slice axis. See examples:
// ex 1: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 2, 2, 4}, {2, 2, 2, 4}, {2, 2, 2, 4}
// slc_axis_len = 12, rsp_axis_len = 6
// New Starts: {0*6/12, 4*6/12, 8*6/12} = {0, 2, 4}
// New Ends: {4*6/12, 8*6/12, 12*6/12} = {2, 4, 6}
// ex 2: Input Shape: {2, 12, 4}, Slice Axis: 1, Slices are: (0:4), (4:8), (8:12),
// Reshape Outputs: {2, 16}, {2, 16}, {2, 16}
// slc_axis_len = 12, rsp_axis_len = 48
// New Starts: {0*48/12, 4*48/12, 8*48/12} = { 0, 16, 32}
// New Ends: {4*48/12, 8*48/12, 12*48/12} = {16, 32, 48}
std::vector<int64_t> new_starts(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), new_starts.begin(), [&](auto is) {
auto cont = is->inputs().front();
auto og_slc = cont->inputs().front();
return any_cast<op::slice>(og_slc->get_operator()).starts[0] * rsp_axis_len /
slc_axis_len;
});
std::vector<int64_t> new_ends(vec_rsp.size());
std::transform(vec_rsp.begin(), vec_rsp.end(), new_ends.begin(), [&](auto is) {
auto cont = is->inputs().front();
auto og_slc = cont->inputs().front();
return any_cast<op::slice>(og_slc->get_operator()).ends[0] * rsp_axis_len /
slc_axis_len;
});
// insert the reshape instruction and add contiguous if needed // insert the reshape instruction and add contiguous if needed
if(not input->get_shape().standard()) if(not input->get_shape().standard())
...@@ -1413,16 +1475,14 @@ struct find_split_reshape ...@@ -1413,16 +1475,14 @@ struct find_split_reshape
std::next(input), make_op("reshape", {{"dims", rsp_out_lens}}), input); std::next(input), make_op("reshape", {{"dims", rsp_out_lens}}), input);
// replace the original reshape with slice // replace the original reshape with slice
int64_t start = 0;
for(std::size_t i = 0; i < vec_rsp.size(); ++i) for(std::size_t i = 0; i < vec_rsp.size(); ++i)
{ {
m.replace_instruction( m.replace_instruction(
vec_rsp[i], vec_rsp[i],
make_op( make_op(
"slice", "slice",
{{"axes", {rsp_axis}}, {"starts", {start}}, {"ends", {start + vec_dims[i]}}}), {{"axes", {rsp_axis}}, {"starts", {new_starts[i]}}, {"ends", {new_ends[i]}}}),
rsp_ins); rsp_ins);
start += vec_dims[i];
} }
} }
}; };
......
...@@ -50,6 +50,7 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS ...@@ -50,6 +50,7 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
add_embed_library(migraphx_kernels ${KERNEL_FILES} RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/) add_embed_library(migraphx_kernels ${KERNEL_FILES} RELATIVE ${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/)
configure_file(device/targets.hpp.in include/migraphx/gpu/device/targets.hpp)
file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp) file(GLOB DEVICE_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/device/*.cpp)
add_library(migraphx_device ${DEVICE_GPU_SRCS}) add_library(migraphx_device ${DEVICE_GPU_SRCS})
...@@ -69,6 +70,7 @@ rocm_clang_tidy_check(migraphx_device) ...@@ -69,6 +70,7 @@ rocm_clang_tidy_check(migraphx_device)
target_link_libraries(migraphx_device PUBLIC migraphx) target_link_libraries(migraphx_device PUBLIC migraphx)
target_link_libraries(migraphx_device PRIVATE compile_for_gpu) target_link_libraries(migraphx_device PRIVATE compile_for_gpu)
target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>) target_include_directories(migraphx_device PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_BINAR_DIR}/include>)
target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>) target_include_directories(migraphx_device PRIVATE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/device/include>)
target_compile_options(migraphx_device PRIVATE -Wno-ignored-attributes) target_compile_options(migraphx_device PRIVATE -Wno-ignored-attributes)
migraphx_generate_export_header(migraphx_device DIRECTORY migraphx/gpu/device) migraphx_generate_export_header(migraphx_device DIRECTORY migraphx/gpu/device)
......
...@@ -26,7 +26,9 @@ ...@@ -26,7 +26,9 @@
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#include <migraphx/config.hpp> #include <migraphx/config.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/gpu/device/types.hpp> #include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/targets.hpp>
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
...@@ -84,8 +86,15 @@ inline auto launch(hipStream_t stream, index_int global, index_int local) ...@@ -84,8 +86,15 @@ inline auto launch(hipStream_t stream, index_int global, index_int local)
hipError_t kernel_launch_status = hipGetLastError(); hipError_t kernel_launch_status = hipGetLastError();
if(kernel_launch_status != hipSuccess) if(kernel_launch_status != hipSuccess)
{ {
MIGRAPHX_THROW("MIGraphX device kernel failed to launch with error: " + std::string message = hipGetErrorString(kernel_launch_status);
std::string(hipGetErrorString(kernel_launch_status))); if(not contains(get_targets(), get_device_name()))
{
message += ". Trying to run a kernel for " + get_device_name() +
" but MIGraphX was built for targets " + get_targets_as_string() +
". Please rebuild MIGraphX with -DGPU_TARGETS='" + get_device_name() +
"'.";
}
MIGRAPHX_THROW("MIGraphX device kernel failed to launch with error: " + message);
} }
}; };
} }
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <migraphx/gpu/device/targets.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/errors.hpp>
#include <hip/hip_runtime_api.h>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
static std::vector<std::string> parse_targets() { return split_string(MIGRAPHX_GPU_TARGETS, ';'); }
const std::vector<std::string>& get_targets()
{
static auto result = parse_targets();
return result;
}
std::string get_targets_as_string() { return join_strings(get_targets(), ", "); }
static int get_device_id()
{
int device;
auto status = hipGetDevice(&device);
if(status != hipSuccess)
MIGRAPHX_THROW("No device");
return device;
}
std::string get_device_name()
{
hipDeviceProp_t props{};
auto status = hipGetDeviceProperties(&props, get_device_id());
if(status != hipSuccess)
MIGRAPHX_THROW("Failed to get device properties");
return props.gcnArchName;
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
#define MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
#include <migraphx/config.hpp>
#include <string>
#include <vector>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
#define MIGRAPHX_GPU_TARGETS "@GPU_TARGETS@" // NOLINT
const std::vector<std::string>& get_targets();
std::string get_targets_as_string();
std::string get_device_name();
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif // MIGRAPHX_GUARD_DEVICE_TARGETS_CPP
...@@ -103,7 +103,10 @@ struct mlir_op ...@@ -103,7 +103,10 @@ struct mlir_op
} }
if(ins->name() == "@return") if(ins->name() == "@return")
{ {
return ins_shapes[ins->inputs().at(0)].with_type(type); auto s = ins_shapes[ins->inputs().at(0)].with_type(type);
if(not s.standard())
MIGRAPHX_THROW("MLIR doesnt support non-standard output");
return s;
} }
std::vector<shape> input_shapes; std::vector<shape> input_shapes;
input_shapes.resize(ins->inputs().size()); input_shapes.resize(ins->inputs().size());
...@@ -299,10 +302,8 @@ struct find_mlir_fused_ops ...@@ -299,10 +302,8 @@ struct find_mlir_fused_ops
} }
}; };
struct find_mlir_standalone_convolution_op struct find_mlir_standalone_op
{ {
auto matcher() const { return match::name("convolution"); }
void apply(module_pass_manager& mpm, const match::matcher_result& r) const void apply(module_pass_manager& mpm, const match::matcher_result& r) const
{ {
auto conv_based_op = r.result; auto conv_based_op = r.result;
...@@ -324,6 +325,16 @@ struct find_mlir_standalone_convolution_op ...@@ -324,6 +325,16 @@ struct find_mlir_standalone_convolution_op
} }
}; };
struct find_mlir_standalone_convolution_op : find_mlir_standalone_op
{
auto matcher() const { return match::name("convolution"); }
};
struct find_mlir_standalone_dot_op : find_mlir_standalone_op
{
auto matcher() const { return match::name("dot"); }
};
/** /**
* @brief Declares a new MIGraphX environment variable which forces to generate * @brief Declares a new MIGraphX environment variable which forces to generate
* only specific MLIR operations. * only specific MLIR operations.
...@@ -331,7 +342,7 @@ struct find_mlir_standalone_convolution_op ...@@ -331,7 +342,7 @@ struct find_mlir_standalone_convolution_op
* The variable, if defined, forces MIGraphX to use only specific operations * The variable, if defined, forces MIGraphX to use only specific operations
* with MLIR regardless of the underlying GPU architecture. The variable accepts * with MLIR regardless of the underlying GPU architecture. The variable accepts
* a list of operations separated by comma. The variable recognizes the following * a list of operations separated by comma. The variable recognizes the following
* operations: "fused", "convolution". If the variable is not defined MIGraphX * operations: "fused", "convolution", "dot". If the variable is not defined MIGraphX
* will decide by itself which operations to delegate to MLIR. The variable is * will decide by itself which operations to delegate to MLIR. The variable is
* intended to be primarily used by rocMLIR developers. * intended to be primarily used by rocMLIR developers.
*/ */
...@@ -346,18 +357,15 @@ bool is_requested(std::string_view option) ...@@ -346,18 +357,15 @@ bool is_requested(std::string_view option)
return contains(options, option); return contains(options, option);
} }
bool is_fusion_enabled() bool is_enabled(std::string_view op_name, context* ctx)
{ {
if(is_self_decide()) if(is_self_decide())
{
if(op_name == "fused")
{ {
return true; return true;
} }
return is_requested("fused"); else if(op_name == "convolution")
}
bool is_standalone_convs_enabled(context* ctx)
{
if(is_self_decide())
{ {
if(ctx == nullptr) if(ctx == nullptr)
{ {
...@@ -370,7 +378,12 @@ bool is_standalone_convs_enabled(context* ctx) ...@@ -370,7 +378,12 @@ bool is_standalone_convs_enabled(context* ctx)
return starts_with(device.get_gfx_name(), navi_family); return starts_with(device.get_gfx_name(), navi_family);
} }
} }
return is_requested("convolution"); else
{
return false;
}
}
return is_requested(op_name);
} }
} // namespace } // namespace
...@@ -379,21 +392,25 @@ bool is_standalone_convs_enabled(context* ctx) ...@@ -379,21 +392,25 @@ bool is_standalone_convs_enabled(context* ctx)
void fuse_mlir::apply(module_pass_manager& mpm) const void fuse_mlir::apply(module_pass_manager& mpm) const
{ {
#ifdef MIGRAPHX_MLIR #ifdef MIGRAPHX_MLIR
if(is_fusion_enabled()) if(is_enabled("fused", this->ctx))
{ {
match::find_matches(mpm, find_mlir_fused_ops{}); match::find_matches(mpm, find_mlir_fused_ops{});
} }
if(is_standalone_convs_enabled(this->ctx)) if(is_enabled("convolution", this->ctx))
{ {
match::find_matches(mpm, find_mlir_standalone_convolution_op{}); match::find_matches(mpm, find_mlir_standalone_convolution_op{});
} }
if(is_enabled("dot", this->ctx))
{
match::find_matches(mpm, find_mlir_standalone_dot_op{});
}
#else #else
(void)mpm; (void)mpm;
#endif #endif
} }
} // namespace gpu } // namespace gpu
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment