Commit 5db68230 authored by Jing Zhang's avatar Jing Zhang
Browse files

Merge branch 'navi4x_wmma' of github.com:ROCm/composable_kernel-internal into navi4x_wmma

parents 14b422d7 2052dfc9
......@@ -416,7 +416,7 @@ struct BlockwiseGemmWMMA
static constexpr auto a_thread_desc_ = make_naive_tensor_descriptor(
make_tuple(Number<KPack / A_K1 / A_KRow>{}, Number<MRepeat>{}, I1, I1, I1, Number<A_K1>{}),
make_tuple(Number<A_K1>{},
Number<KPack / A_KRow>{},
Number<A_KRow * A_K1>{},
Number<A_K1>{},
Number<A_K1>{},
Number<A_K1>{},
......@@ -425,7 +425,7 @@ struct BlockwiseGemmWMMA
static constexpr auto b_thread_desc_ = make_naive_tensor_descriptor(
make_tuple(Number<KPack / B_K1 / B_KRow>{}, Number<NRepeat>{}, I1, I1, I1, Number<B_K1>{}),
make_tuple(Number<B_K1>{},
Number<KPack / B_KRow>{},
Number<B_KRow * B_K1>{},
Number<B_K1>{},
Number<B_K1>{},
Number<B_K1>{},
......
......@@ -135,7 +135,7 @@ struct GridwiseGemm_Wmma
static constexpr auto MWaves = MPerBlock / (MRepeat * MPerWmma);
static constexpr auto NWaves = NPerBlock / (NRepeat * NPerWmma);
static constexpr auto WmmaK = K1 == 16 ? 32 : 16;
static constexpr auto WmmaK = (K1 == 16) ? 32 : 16;
using ThisThreadBlock = ThisThreadBlock<BlockSize>;
......
......@@ -11,7 +11,7 @@ cmake
-D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \
-D CMAKE_BUILD_TYPE=Release \
-D BUILD_DEV=ON \
-D GPU_TARGETS="gfx908;gfx90a;gfx940" \
-D GPU_TARGETS="gfx1200" \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D USE_BITINT_EXTENSION_INT4=OFF \
${MY_PROJECT_SOURCE}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment