"driver/include/host_tensor.hpp" did not exist on "acd7082fe109aa4228dfca652e87cab96bc6837f"
Commit 05830053 authored by aska-0096's avatar aska-0096
Browse files

Bug found, intra-row permute off caused

parent dc8309db
...@@ -60,7 +60,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle ...@@ -60,7 +60,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
true, true,
4, // C shuffle (M Repeat) Per store 4, // C shuffle (M Repeat) Per store
1, // C shuffle (N Repeat) Per store 1, // C shuffle (N Repeat) Per store
S<1, 32, 1, 8>, S<1, 16, 1, 16>,
8>; 8>;
// clang-format on // clang-format on
......
...@@ -87,11 +87,15 @@ struct DeviceGemmWmma_CShuffle : public DeviceGemm<ALayout, ...@@ -87,11 +87,15 @@ struct DeviceGemmWmma_CShuffle : public DeviceGemm<ALayout,
static constexpr auto NWaves = NPerBlock / (NRepeat * NPerWmma); static constexpr auto NWaves = NPerBlock / (NRepeat * NPerWmma);
static constexpr auto WmmaK = 16; static constexpr auto WmmaK = 16;
static constexpr auto AEnableLds = NWaves == 1 ? false : true; static constexpr auto AEnableLds_auto = NWaves == 1 ? false : true;
static constexpr auto BEnableLds = MWaves == 1 ? false : true; static constexpr auto BEnableLds_auto = MWaves == 1 ? false : true;
// Unconditional enable double side LDS if uncommented following
// AEnableLds = true; // If true, LDS is used unconditionally
// BEnableLds = true; static constexpr auto AEnableLds_manu = false;
static constexpr auto BEnableLds_manu = false;
static constexpr auto AEnableLds = AEnableLds_auto || AEnableLds_manu;
static constexpr auto BEnableLds = BEnableLds_auto || BEnableLds_manu;
static constexpr auto matrix_padder = static constexpr auto matrix_padder =
MatrixPadder<GemmSpec, index_t, index_t, index_t>{MPerBlock, NPerBlock, KPerBlock}; MatrixPadder<GemmSpec, index_t, index_t, index_t>{MPerBlock, NPerBlock, KPerBlock};
......
...@@ -360,7 +360,6 @@ struct GridwiseGemmPipeline_v1<1, false, true> ...@@ -360,7 +360,6 @@ struct GridwiseGemmPipeline_v1<1, false, true>
} }
}; };
// placeholder
template <> template <>
struct GridwiseGemmPipeline_v1<1, true, false> struct GridwiseGemmPipeline_v1<1, true, false>
{ {
......
...@@ -1401,6 +1401,7 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow ...@@ -1401,6 +1401,7 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
// apply element-wise operation // apply element-wise operation
element_op_(v_this_row, src_buf[Number<src_offset>{}]); element_op_(v_this_row, src_buf[Number<src_offset>{}]);
// apply intra-row permute.
if constexpr(IntraRowSwizzlePerm) if constexpr(IntraRowSwizzlePerm)
{ {
temp = __builtin_amdgcn_permlane16( temp = __builtin_amdgcn_permlane16(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment