Unverified Commit 3216fe52 authored by Umang Yadav's avatar Umang Yadav Committed by GitHub
Browse files

Make global workitems multiple of local workitems (#1976)

HIP requires global work items in multiple of local work items. If it is not it is not guaranteed to generate correct results all the time.
Fixes #1977
Fixes #1644
MIGraphX CI has moved to rocm-5.6 which doesn't require hipRTC workarounds
parent 04ae9b80
...@@ -323,6 +323,7 @@ jobs: ...@@ -323,6 +323,7 @@ jobs:
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: | run: |
set +x
gh extension install actions/gh-actions-cache --pin v1.0.1 gh extension install actions/gh-actions-cache --pin v1.0.1
gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm gh actions-cache delete ${{ steps.ccache_restore.outputs.cache-matched-key }} --confirm
...@@ -439,6 +440,7 @@ jobs: ...@@ -439,6 +440,7 @@ jobs:
env: env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: | run: |
set +x
gh extension install actions/gh-actions-cache gh extension install actions/gh-actions-cache
gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm gh actions-cache delete ${{ steps.ccache_restore_fpga.outputs.cache-matched-key }} --confirm
continue-on-error: true continue-on-error: true
......
...@@ -15,13 +15,11 @@ def rocmtestnode(Map conf) { ...@@ -15,13 +15,11 @@ def rocmtestnode(Map conf) {
def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++") def compiler = bconf.get("compiler", "/opt/rocm/llvm/bin/clang++")
def flags = bconf.get("flags", "") def flags = bconf.get("flags", "")
def gpu_debug = bconf.get("gpu_debug", "0") def gpu_debug = bconf.get("gpu_debug", "0")
def hiprtc_workarounds = bconf.get("hiprtc_workarounds", "0")
def cmd = """ def cmd = """
ulimit -c unlimited ulimit -c unlimited
echo "leak:dnnl::impl::malloc" > suppressions.txt echo "leak:dnnl::impl::malloc" > suppressions.txt
export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt" export LSAN_OPTIONS="suppressions=\$(pwd)/suppressions.txt"
export MIGRAPHX_GPU_DEBUG=${gpu_debug} export MIGRAPHX_GPU_DEBUG=${gpu_debug}
export MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS=${hiprtc_workarounds}
export CXX=${compiler} export CXX=${compiler}
export CXXFLAGS='-Werror' export CXXFLAGS='-Werror'
env env
...@@ -109,7 +107,7 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build -> ...@@ -109,7 +107,7 @@ rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
stage('hipRTC Debug') { stage('hipRTC Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true) cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true)
} }
}, clang_release: rocmnode('cdna') { cmake_build -> }, clang_release: rocmnode('cdna') { cmake_build ->
stage('Hip Clang Release') { stage('Hip Clang Release') {
......
...@@ -135,14 +135,13 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over) ...@@ -135,14 +135,13 @@ compute_global_for(context& ctx, std::size_t n, std::size_t over)
std::size_t max_global = ctx.get_current_device().get_cu_count() * std::size_t max_global = ctx.get_current_device().get_cu_count() *
ctx.get_current_device().get_max_workitems_per_cu(); ctx.get_current_device().get_max_workitems_per_cu();
return [n, over, max_global](std::size_t local) { return [n, over, max_global](std::size_t local) {
std::size_t num_elements = n; // hip require global workitems multiple of local workitems. It may degrade performance.
// [TODO]: consider adding "fno-hip-uniform-block" flag when it becomes available.
// https://reviews.llvm.org/D155213
std::size_t num_elements = ((n + local - 1) / local) * local;
std::size_t groups = (num_elements + local - 1) / local; std::size_t groups = (num_elements + local - 1) / local;
std::size_t max_blocks = max_global / local; std::size_t max_blocks = max_global / local;
std::size_t nglobal = std::min(max_blocks * over, groups) * local; std::size_t nglobal = std::min(max_blocks * over, groups) * local;
#ifdef MIGRAPHX_USE_HIPRTC
if(enabled(MIGRAPHX_ENABLE_HIPRTC_WORKAROUNDS{}))
num_elements = ((num_elements + local - 1) / local) * local;
#endif
return std::min(nglobal, num_elements); return std::min(nglobal, num_elements);
}; };
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment