Commit 82bb8dde authored by Jing Zhang's avatar Jing Zhang
Browse files

fixed splitk

parent 65cfb2a1
...@@ -59,7 +59,6 @@ __device__ inline half2_t pki4_to_half2(pk_i4_t q) ...@@ -59,7 +59,6 @@ __device__ inline half2_t pki4_to_half2(pk_i4_t q)
int x_h = (x_u8 & 0xf0) << 12; int x_h = (x_u8 & 0xf0) << 12;
const int EX = 0x64006400; const int EX = 0x64006400;
const int SUB = 0xE408E408; //-8 const int SUB = 0xE408E408; //-8
int lo = (x_l | x_h) | EX; int lo = (x_l | x_h) | EX;
......
...@@ -601,7 +601,7 @@ struct GridwiseGemm_xdl_cshuffle_v3 ...@@ -601,7 +601,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
{ {
if constexpr(is_same_v<tensor_layout::gemm::RowMajor, ALayout>) if constexpr(is_same_v<tensor_layout::gemm::RowMajor, ALayout>)
{ {
a_k_split_offset = blockIdx.z * karg.KRead; a_k_split_offset = blockIdx.z * karg.KRead / APackedSize;
} }
else if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, ALayout>) else if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, ALayout>)
{ {
...@@ -614,7 +614,7 @@ struct GridwiseGemm_xdl_cshuffle_v3 ...@@ -614,7 +614,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
} }
else if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, BLayout>) else if constexpr(is_same_v<tensor_layout::gemm::ColumnMajor, BLayout>)
{ {
b_k_split_offset = blockIdx.z * karg.KRead; b_k_split_offset = blockIdx.z * karg.KRead / BPackedSize;
} }
if(blockIdx.z < static_cast<uint32_t>(karg.KBatch - 1)) if(blockIdx.z < static_cast<uint32_t>(karg.KBatch - 1))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment