[moe] Use enable_chunking func (to support disabling chunking) (#29935)

Signed-off-by: Ming Yang <minos.future@gmail.com>

[moe] Use enable_chunking func (to support disabling chunking) (#29935)
Signed-off-by: Ming Yang <minos.future@gmail.com>
8372be28 · Ming Yang · GitHub · 8da6ae49 · 8372be28
Unverified Commit 8372be28 authored Dec 18, 2025 by Ming Yang Committed by GitHub Dec 18, 2025
Show whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/model_executor/layers/fused_moe/modular_kernel.py vllm/model_executor/layers/fused_moe/modular_kernel.py +2 -2

No files found.
--- a/vllm/model_executor/layers/fused_moe/modular_kernel.py
+++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module):
            1,
            (
                M
-                if not self.fused_experts.supports_chunking()
+                if not self.fused_experts.enable_chunking()
                else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
            ),
        )
@@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module):
            is_forward_context_available()
            and get_forward_context().attn_metadata is None
        )
-        if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep:
+        if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep:
            max_workspace_13, max_workspace_2, max_fused_out_shape = (
                self.fused_experts.workspace_shapes(
                    envs.VLLM_FUSED_MOE_CHUNK_SIZE,