fix mock token id

7bc31426 · carlushuang · c35bb816 · 7bc31426 · 7bc31426 · 7bc31426
Commit 7bc31426 authored Jan 09, 2025 by carlushuang
3 changed files
--- a/include/ck_tile/host/reference/reference_fused_moe.hpp
+++ b/include/ck_tile/host/reference/reference_fused_moe.hpp
@@ -85,17 +85,6 @@ void reference_fused_moe(
    ck_tile::index_t intermediate_size_0 = intermediate_size;
    ck_tile::index_t intermediate_size_1 = intermediate_size / (gate_only ? 1 : 2);
-    // TODO: better remove this in the future, or modify the token_id value
-    auto get_topk_id = [&](ck_tile::index_t token_id_, ck_tile::index_t expert_id_) {
-        for(ck_tile::index_t i_ = 0; i_ < topk; i_++)
-        {
-            if(token_ids_host(token_id_, i_) == expert_id_)
-                return i_;
-        }
-        throw std::runtime_error("not correct token/expert pair\n");
-        return -1; // TODO: not correct!!
-    };
    ck_tile::HostTensor<AccDataType> out_topk_tokens({tokens, topk, hidden_size});
    int max_num_tokens_padded = topk * tokens + experts * block_m - topk;
@@ -105,11 +94,31 @@ void reference_fused_moe(
        if(i_tile >= num_sorted_tiles)
            return;
        ck_tile::index_t i_expert = sorted_expert_ids_host.mData[i_tile];
-        ck_tile::index_t i_token  = sorted_token_ids_host.mData[i_flatten];
+#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
+        ck_tile::index_t i_token = sorted_token_ids_host.mData[i_flatten];
+        ck_tile::index_t i_topk  = i_token >> 24;
+        i_token &= 0xffffff;
+        if(i_token >= tokens)
+            return;
+        (void)token_ids_host;
+#else
+        // TODO: better remove this in the future, or modify the token_id value
+        auto get_topk_id = [&](ck_tile::index_t token_id_, ck_tile::index_t expert_id_) {
+            for(ck_tile::index_t i_ = 0; i_ < topk; i_++)
+            {
+                if(token_ids_host(token_id_, i_) == expert_id_)
+                    return i_;
+            }
+            throw std::runtime_error("not correct token/expert pair\n");
+            return -1; // TODO: not correct!!
+        };
+        ck_tile::index_t i_token = sorted_token_ids_host.mData[i_flatten];
        if(i_token >= tokens)
            return;
        ck_tile::index_t i_topk = get_topk_id(i_token, i_expert); // TODO: ugly
-        auto weight             = sorted_weight_host.mData[i_flatten];
+#endif
+        auto weight = sorted_weight_host.mData[i_flatten];
        ck_tile::HostTensor<AccDataType> acc_0({1, intermediate_size_0});
        // first gemm

--- a/include/ck_tile/ops/fused_moe/kernel/fused_moegemm_kernel.hpp
+++ b/include/ck_tile/ops/fused_moe/kernel/fused_moegemm_kernel.hpp
@@ -299,6 +299,9 @@ struct FusedMoeGemmKernel
            index_t token_id =
                reinterpret_cast<const index_t*>(kargs.sorted_token_ids_ptr)[sorted_token_id];
+#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
+            token_id &= 0xffffff;
+#endif
            auto topk_weight = reinterpret_cast<const TopkWeightDataType*>(
                kargs.sorted_weight_ptr)[sorted_token_id];

--- a/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_uk.hpp
+++ b/include/ck_tile/ops/fused_moe/pipeline/fused_moegemm_pipeline_flatmm_uk.hpp
@@ -125,6 +125,9 @@ struct FusedMoeGemmPipeline_FlatmmUk
        array<index_t, n_size> row_ids;
        static_for<0, n_size, 1>{}([&](auto i) {
            row_ids.at(i) = sorted_token_ids_ptr[coords[i]]; // base_coord + i * MLans;
+#if CK_TILE_REFERENCE_MOE_SORTING_MOCK_ID
+            row_ids.at(i) &= 0xffffff;
+#endif
        });
        return row_ids;