Fix error message

7f55c715 · Jiashi Li · e9b67321 · 7f55c715 · 7f55c715 · 7f55c715
Commit 7f55c715 authored Sep 30, 2025 by Jiashi Li
3 changed files
--- a/csrc/pybind.cpp
+++ b/csrc/pybind.cpp
@@ -41,7 +41,7 @@ struct Arch {
    }
 };
-// DecodingAttnImplMeta - A struct to hold metadata for Decoding Attention Implementation (i.e. Hopper Dense BF16, Hopper Sparse FP8, etc.)
+// DecodingAttnImplMeta - A struct to hold metadata for Decoding Attention Implementation (i.e. SM90 Dense BF16, SM90 Sparse FP8, etc.)
 struct DecodingAttnImplMeta {
    int num_sm_parts;
    int fixed_overhead_num_blocks;
@@ -334,7 +334,7 @@ fwd_kvcache_mla(
                TORCH_CHECK(q_dtype == torch::kBFloat16, "Sparse FP8 MLA only supports BFloat16 on SM90");
                sm90::run_flash_splitkv_mla_fp8_sparse_kernel(params, stream);
            } else {
-                TORCH_CHECK(false, "Dense FP8 MLA is not supported on SM90");
+                TORCH_CHECK(false, "Only FP8 kvcahe is supported for sparse MLA on SM90");
            }
        } else {
            if (is_fp8) {
@@ -347,7 +347,7 @@ fwd_kvcache_mla(
                    sm90::run_flash_splitkv_mla_kernel<cutlass::half_t>(params, stream);
 #endif
                } else {
-                    TORCH_CHECK(false, "Unsupported tensor dtype for query");
+                    TORCH_CHECK(false, "Unsupported dtype for dense MLA on SM90");
                }
            }
        }

--- a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp
+++ b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_kernel_tma_warpspecialized.hpp
@@ -949,7 +949,7 @@ struct Sm100FmhaBwdKernelTmaWarpSpecialized {
      TensorC const& coord,
      TensorShape const& tensor_shape) {
-    //TODO Performance of FlashMLA on hopper is dropped with latest cutlass, so here revert the to the old version.
+    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert the to the old version.
    // Tensor preds = cute::lazy::transform(coord, [&](auto const& c) { return elem_less(c, tensor_shape); });
    auto copy_op = make_cotiled_copy(

--- a/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp
+++ b/csrc/sm100/prefill/dense/kernel/sm100_fmha_bwd_mla_kernel_tma_warpspecialized.hpp
@@ -953,7 +953,8 @@ struct Sm100FmhaBwdMlaKernelTmaWarpSpecialized {
      TensorR const& regs,
      TensorC const& coord,
      TensorShape const& tensor_shape) {
-    //TODO Performance of FlashMLA on hopper is dropped with latest cutlass, so here revert the to the old version.
+    // TODO: Performance of FlashMLA on sm90 is dropped with latest cutlass, so here revert the to the old version.
    // Tensor preds = cute::lazy::transform(coord, [&](auto const& c) { return elem_less(c, tensor_shape); });
    auto copy_op = make_cotiled_copy(