Commit 7c8e92fa authored by aska-0096's avatar aska-0096
Browse files

tempsave

parent 5d9c964e
......@@ -28,13 +28,13 @@ using DeviceGemmV2Instance =
ADataType, BDataType, CDataType, AccDataType, CShuffleDataType,
PassThrough, PassThrough, PassThrough, GemmDefault,
256,
128, 128,
224, 256,
64, 8, 1,
16, 16,
4, 4,
7, 8,
S<8, 32, 1>, S<1, 0, 2>, S<1, 0, 2>,
2, 8, 8, 0,
S<16, 16, 1>, S<0, 2, 1>, S<0, 2, 1>,
S<8, 32, 1>, S<0, 2, 1>, S<0, 2, 1>,
1, 8, 8, 1,
1, 2, S<1, 32, 1, 8>, 8,
ck::BlockGemmPipelineScheduler::Intrawave,ck::BlockGemmPipelineVersion::v3>;
......
......@@ -752,12 +752,18 @@ struct GridwiseGemm_xdl_cshuffle_v3
__device__ static constexpr auto GetBBlockDescriptor_BK0PerBlock_NPerBlock_BK1()
{
// B matrix in LDS memory, dst of blockwise copy
if constexpr(BBlockLdsExtraN)
// if constexpr(BBlockLdsExtraN)
// {
// return make_naive_tensor_descriptor(
// make_tuple(BK0Number, Number<NPerBlock>{}, BK1Number),
// make_tuple(BK1Number, Number<KPerBlock + BBlockLdsExtraN>{}, I1));
// }
// else
if constexpr(BBlockLdsExtraN && is_same<tensor_layout::gemm::RowMajor, BLayout>::value)
{
return make_naive_tensor_descriptor(
make_tuple(BK0Number, Number<NPerBlock>{}, BK1Number),
// make_tuple(BK1Number, Number<KPerBlock + BBlockLdsExtraN>{}, I1));
make_tuple(BK1Number, Number<KPerBlock>{}, I1));
make_tuple(BK1Number * Number<NPerBlock>{}, I1, Number<NPerBlock>{}));
}
else if constexpr(is_same<tensor_layout::gemm::ColumnMajor, BLayout>::value)
{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment