Commit 84b4ada5 authored by aska-0096's avatar aska-0096
Browse files

gemm sanity fix

parent 6a9d7b64
...@@ -343,7 +343,7 @@ struct GridwiseGemmPipeline_v1<1, false, true> ...@@ -343,7 +343,7 @@ struct GridwiseGemmPipeline_v1<1, false, true>
b_blockwise_copy.RunWrite(b_block_desc, b_block_buf); b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
// a_block_buf = a_block_buf_switch; a_block_buf = a_block_buf_switch;
++i; ++i;
} while(i < (num_loop - 1)); } while(i < (num_loop - 1));
} }
......
...@@ -1398,23 +1398,23 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow ...@@ -1398,23 +1398,23 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
if constexpr(IntraRowSwizzlePerm) if constexpr(IntraRowSwizzlePerm)
{ {
// temp = __builtin_amdgcn_permlane16( temp = __builtin_amdgcn_permlane16(
// temp, temp,
// type_convert<int>(v_this_row), type_convert<int>(v_this_row),
// 0xb3a29180, 0xb3a29180,
// 0xf7e6d5c4, 0xf7e6d5c4,
// 1, 1,
// 0); 0);
v_this_row = type_convert<SrcData>(temp); v_this_row = type_convert<SrcData>(temp);
} }
// apply inter-row permute. // apply inter-row permute.
// temp = __builtin_amdgcn_permlanex16(temp, temp = __builtin_amdgcn_permlanex16(temp,
// type_convert<int>(v_this_row), type_convert<int>(v_this_row),
// LowEightRowlaneIdx, LowEightRowlaneIdx,
// HighEightRowLaneIdx, HighEightRowLaneIdx,
// 1, 1,
// 0); 0);
v_theother_row = type_convert<SrcData>(temp); v_theother_row = type_convert<SrcData>(temp);
if(get_thread_local_1d_id() % 32 < 16) if(get_thread_local_1d_id() % 32 < 16)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment