Commit 6b177d67 authored by coderfeli's avatar coderfeli
Browse files

fix swizzle = false

parent 47648412
...@@ -139,7 +139,7 @@ static constexpr ck::index_t BLOCKSIZE = 256; ...@@ -139,7 +139,7 @@ static constexpr ck::index_t BLOCKSIZE = 256;
static constexpr ck::index_t NPerBlock = 128; static constexpr ck::index_t NPerBlock = 128;
static constexpr ck::index_t MNPerXDL = 32; static constexpr ck::index_t MNPerXDL = 32;
static constexpr ck::index_t KPerBlock = 128 / sizeof(A0DataType); static constexpr ck::index_t KPerBlock = 128 / sizeof(A0DataType);
static constexpr ck::index_t Nswizzle = true; static constexpr ck::index_t Nswizzle = false;
static constexpr ck::index_t AK1 = 16 / sizeof(A0DataType); static constexpr ck::index_t AK1 = 16 / sizeof(A0DataType);
static constexpr ck::index_t BK1 = 16 / sizeof(B0DataType); static constexpr ck::index_t BK1 = 16 / sizeof(B0DataType);
static constexpr ck::index_t EVec = 16 / sizeof(EDataType); static constexpr ck::index_t EVec = 16 / sizeof(EDataType);
......
...@@ -1155,12 +1155,11 @@ struct GridwiseMoeGemm ...@@ -1155,12 +1155,11 @@ struct GridwiseMoeGemm
c_grid_desc_m_n, problem.MBlock, problem.NBlock); c_grid_desc_m_n, problem.MBlock, problem.NBlock);
const index_t max_token_id = __builtin_amdgcn_readfirstlane(p_max_token_id[0]); const index_t max_token_id = __builtin_amdgcn_readfirstlane(p_max_token_id[0]);
// constexpr int expert_tile_cnt[8] = {2, 1, 1, 2, 2, 2, 1, 2}; // constexpr int expert_tile_cnt[8] = {2, 1, 1, 2, 2, 2, 1, 2};
const index_t expert_block_id = blockIdx.x / problem.NBlock;
// const index_t b_block_id = blockIdx.x % problem.NBlock; // const index_t b_block_id = blockIdx.x % problem.NBlock;
const index_t expert_id = __builtin_amdgcn_readfirstlane(p_sorted_expert_ids[expert_block_id]);
const auto block_mn = [&]() -> std::pair<int, int> { const auto block_mn = [&]() -> std::pair<int, int> {
if constexpr (NSwizzle) if constexpr (NSwizzle)
{ {
const index_t expert_block_id = blockIdx.x / problem.NBlock;
const index_t es = __builtin_amdgcn_readfirstlane(p_max_token_id[expert_block_id + 1]); const index_t es = __builtin_amdgcn_readfirstlane(p_max_token_id[expert_block_id + 1]);
const index_t expert_swizzle = es > 0 ? es : 1; //p_max_token_id[expert_id + 1]; const index_t expert_swizzle = es > 0 ? es : 1; //p_max_token_id[expert_id + 1];
const index_t expert_block_swizzle = expert_block_id / expert_swizzle; const index_t expert_block_swizzle = expert_block_id / expert_swizzle;
...@@ -1174,6 +1173,7 @@ struct GridwiseMoeGemm ...@@ -1174,6 +1173,7 @@ struct GridwiseMoeGemm
}(); }();
const index_t block_n_id = block_mn.first; const index_t block_n_id = block_mn.first;
const index_t block_m_id = block_mn.second; const index_t block_m_id = block_mn.second;
const index_t expert_id = __builtin_amdgcn_readfirstlane(p_sorted_expert_ids[block_m_id]);
// if (threadIdx.x==0) { // if (threadIdx.x==0) {
// printf("bid %d, eid %d, es %d, esi %d, bsi %d, m %d, n %d\n", blockIdx.x, expert_id, expert_swizzle, expert_block_swizzle, b_block_id_swizzle, block_m_id, block_n_id); // printf("bid %d, eid %d, es %d, esi %d, bsi %d, m %d, n %d\n", blockIdx.x, expert_id, expert_swizzle, expert_block_swizzle, b_block_id_swizzle, block_m_id, block_n_id);
// } // }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment