加入print_param

2ff5a773 · zhanghj2 · 79096f6b · 2ff5a773 · 2ff5a773
Commit 2ff5a773 authored Mar 20, 2026 by zhanghj2
Hide whitespace changes
Inline Side-by-side

Showing with 27 additions and 2 deletions

csrc/api/dense_decode.h csrc/api/dense_decode.h +15 -1

csrc/api/sparse_decode.h csrc/api/sparse_decode.h +12 -1

No files found.
--- a/csrc/api/dense_decode.h
+++ b/csrc/api/dense_decode.h
@@ -121,7 +121,21 @@ dense_attn_decode_interface(
        KU_CHECK_SHAPE(tile_scheduler_metadata, num_sm_parts, sizeof(DecodingSchedMeta)/sizeof(int));
        KU_CHECK_SHAPE(num_splits, batch_size+1);
    }
-
+    bool print_param = (std::getenv("FLASH_MLA_PRINT_PARAM") != nullptr);
+    if (print_param) {
+        fprintf(stderr, "[FlashMLA] [dense_attn_decode_interface] [%s] batch_size = %d seqlen_q_ori = %d "
+            "num_heads_q = %d head_size_k = %d max_num_blocks_per_seq = %d num_blocks %d page_block_size = %d num_heads_k = %d \n",
+            arch.archName.c_str(), 
+            batch_size,
+            seqlen_q_ori,
+            num_heads_q,
+            head_size_k,
+            max_num_blocks_per_seq,
+            num_blocks,
+            page_block_size,
+            num_heads_k
+        );
+    }
    // Set the sizes
    DenseAttnDecodeParams params;
    params.b = batch_size;

--- a/csrc/api/sparse_decode.h
+++ b/csrc/api/sparse_decode.h
@@ -228,7 +228,18 @@ sparse_attn_decode_interface(
    } else {
        TORCH_CHECK(false, "Unsupported d_qk: ", d_qk);
    }
-
+    bool print_param = (std::getenv("FLASH_MLA_PRINT_PARAM") != nullptr);
+    if (print_param) {
+        fprintf(stderr, "[FlashMLA] [sparse_attn_decode_interface] b = %d s_q = %d  h_q = %d d_qk = %d num_blocks = %d page_block_size = %d h_kv = %d topk = %d " 
+            "bytes_per_token = %ld have_topk_length = %d "
+            "have_extra_kcache = %d have_extra_topk_length = %d have_attn_sink = %d \n", 
+            b, s_q, h_q, d_qk, num_blocks, page_block_size, h_kv, topk, kv.stride(1),
+            have_topk_length,
+            have_extra_kcache,
+            have_extra_topk_length,
+            have_attn_sink
+        );
+    }
    std::vector<DecodeFeatures> features;
    if (h_q <= 16) {
        features.push_back(DecodeFeatures::HEAD_16);