Commit 9d16e558 authored by aska-0096's avatar aska-0096
Browse files

confirmed compiler sanity when skip A-LDS

parent a3b86965
...@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle ...@@ -36,14 +36,14 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
CElementOp, CElementOp,
GemmDefault, GemmDefault,
256, // BlockSize 256, // BlockSize
128, // MPerBlock 256, // MPerBlock
128, // NPerBlock 32, // NPerBlock
32, // KPerBlock 32, // KPerBlock
8, // K1 8, // K1
16, // MPerWmma 16, // MPerWmma
16, // NPerWmma 16, // NPerWmma
1, // M Repeat 4, // M Repeat
8, // N-Repeat 1, // N-Repeat
S<4, 64, 1>, S<4, 64, 1>,
S<1, 0, 2>, S<1, 0, 2>,
S<1, 0, 2>, S<1, 0, 2>,
...@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle ...@@ -51,15 +51,15 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmWmma_CShuffle
8, 8,
8, 8,
true, true,
S<4, 64, 1>, S<4, 32, 1>,
S<1, 0, 2>, S<1, 0, 2>,
S<1, 0, 2>, S<1, 0, 2>,
2, 2,
8, 8,
8, 8,
true, true,
1, // C shuffle (M Repeat) Per store 4, // C shuffle (M Repeat) Per store
4, // C shuffle (N Repeat) Per store 1, // C shuffle (N Repeat) Per store
S<1, 64, 1, 4>, S<1, 64, 1, 4>,
8>; 8>;
// clang-format on // clang-format on
......
...@@ -47,7 +47,7 @@ __global__ void ...@@ -47,7 +47,7 @@ __global__ void
{ {
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \ #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx1100__) || defined(__gfx1101__) || \
defined(__gfx1102__)) defined(__gfx1102__))
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()]; __shared__ char p_shared[GridwiseGemm::SharedMemTrait::lds_size];
GridwiseGemm::template Run<HasMainKBlockLoop>(p_a_grid, GridwiseGemm::template Run<HasMainKBlockLoop>(p_a_grid,
p_b_grid, p_b_grid,
...@@ -130,9 +130,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma ...@@ -130,9 +130,7 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
static constexpr auto I5 = Number<5>{}; static constexpr auto I5 = Number<5>{};
static constexpr auto I6 = Number<6>{}; static constexpr auto I6 = Number<6>{};
static constexpr auto I7 = Number<7>{}; static constexpr auto I7 = Number<7>{};
static constexpr auto B_K0 = BGridDesc_K0_N_K1{}.GetLength(I0);
static constexpr auto B_K1 = BGridDesc_K0_N_K1{}.GetLength(I2);
// FIX ME: To be deprecated // FIX ME: To be deprecated
static constexpr auto K1 = Number<K1Value>{}; static constexpr auto K1 = Number<K1Value>{};
...@@ -299,6 +297,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma ...@@ -299,6 +297,8 @@ struct GridwiseGemm_k0mk1_k0nk1_mn_wmma
__host__ __device__ static constexpr auto __host__ __device__ static constexpr auto
MakeBBlockDescriptor_K0_N0_N1_N2_K1(const BBlockDesc_BK0_N_BK1&) MakeBBlockDescriptor_K0_N0_N1_N2_K1(const BBlockDesc_BK0_N_BK1&)
{ {
constexpr auto B_K0 = KPerBlock/K1;
constexpr auto B_K1 = K1;
return transform_tensor_descriptor( return transform_tensor_descriptor(
BBlockDesc_BK0_N_BK1{}, BBlockDesc_BK0_N_BK1{},
make_tuple(make_pass_through_transform(Number<B_K0>{}), make_tuple(make_pass_through_transform(Number<B_K0>{}),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment