Commit 84b4ada5 authored by aska-0096's avatar aska-0096
Browse files

gemm sanity fix

parent 6a9d7b64
......@@ -343,7 +343,7 @@ struct GridwiseGemmPipeline_v1<1, false, true>
b_blockwise_copy.RunWrite(b_block_desc, b_block_buf);
// a_block_buf = a_block_buf_switch;
a_block_buf = a_block_buf_switch;
++i;
} while(i < (num_loop - 1));
}
......
......@@ -1398,23 +1398,23 @@ struct ThreadwiseTensorSliceTransfer_StaticToStatic_InterRow
if constexpr(IntraRowSwizzlePerm)
{
// temp = __builtin_amdgcn_permlane16(
// temp,
// type_convert<int>(v_this_row),
// 0xb3a29180,
// 0xf7e6d5c4,
// 1,
// 0);
temp = __builtin_amdgcn_permlane16(
temp,
type_convert<int>(v_this_row),
0xb3a29180,
0xf7e6d5c4,
1,
0);
v_this_row = type_convert<SrcData>(temp);
}
// apply inter-row permute.
// temp = __builtin_amdgcn_permlanex16(temp,
// type_convert<int>(v_this_row),
// LowEightRowlaneIdx,
// HighEightRowLaneIdx,
// 1,
// 0);
temp = __builtin_amdgcn_permlanex16(temp,
type_convert<int>(v_this_row),
LowEightRowlaneIdx,
HighEightRowLaneIdx,
1,
0);
v_theother_row = type_convert<SrcData>(temp);
if(get_thread_local_1d_id() % 32 < 16)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment