Commit 41ee82a3 authored by aska-0096's avatar aska-0096
Browse files

reproduce the scratch

parent 9d16e558
...@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle ...@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
CElementOp, CElementOp,
GemmDefault, GemmDefault,
256, // BlockSize 256, // BlockSize
256, // MPerBlock 128, // MPerBlock
32, // NPerBlock 128, // NPerBlock
32, // KPerBlock 32, // KPerBlock
8, // K1 8, // K1
16, // MPerWmma 16, // MPerWmma
16, // NPerWmma 16, // NPerWmma
4, // M Repeat 1, // M Repeat
1, // N-Repeat 8, // N-Repeat
S<4, 64, 1>, S<4, 64, 1>,
S<1, 0, 2>, S<1, 0, 2>,
S<1, 0, 2>, S<1, 0, 2>,
...@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle ...@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
8, 8,
8, 8,
true, true,
S<4, 32, 1>, S<4, 64, 1>,
S<1, 0, 2>, S<1, 0, 2>,
S<1, 0, 2>, S<1, 0, 2>,
2, 2,
8, 8,
8, 8,
true, true,
4, // C shuffle (M Repeat) Per store 1, // C shuffle (M Repeat) Per store
1, // C shuffle (N Repeat) Per store 4, // C shuffle (N Repeat) Per store
S<1, 64, 1, 4>, S<1, 64, 1, 4>,
8>; 8>;
// clang-format on // clang-format on
......
...@@ -396,7 +396,8 @@ struct BlockwiseGemmWMMA ...@@ -396,7 +396,8 @@ struct BlockwiseGemmWMMA
A_K1, A_K1,
0x76543210, 0x76543210,
0xfedcba98, 0xfedcba98,
true>; // true
false>;
}; };
template <bool EnableLds> template <bool EnableLds>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment