Unverified Commit 8372be28 authored by Ming Yang's avatar Ming Yang Committed by GitHub
Browse files

[moe] Use enable_chunking func (to support disabling chunking) (#29935)


Signed-off-by: default avatarMing Yang <minos.future@gmail.com>
parent 8da6ae49
...@@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module): ...@@ -743,7 +743,7 @@ class FusedMoEModularKernel(torch.nn.Module):
1, 1,
( (
M M
if not self.fused_experts.supports_chunking() if not self.fused_experts.enable_chunking()
else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE) else min(M, envs.VLLM_FUSED_MOE_CHUNK_SIZE)
), ),
) )
...@@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module): ...@@ -786,7 +786,7 @@ class FusedMoEModularKernel(torch.nn.Module):
is_forward_context_available() is_forward_context_available()
and get_forward_context().attn_metadata is None and get_forward_context().attn_metadata is None
) )
if is_profile_run and self.fused_experts.supports_chunking() and self.is_dp_ep: if is_profile_run and self.fused_experts.enable_chunking() and self.is_dp_ep:
max_workspace_13, max_workspace_2, max_fused_out_shape = ( max_workspace_13, max_workspace_2, max_fused_out_shape = (
self.fused_experts.workspace_shapes( self.fused_experts.workspace_shapes(
envs.VLLM_FUSED_MOE_CHUNK_SIZE, envs.VLLM_FUSED_MOE_CHUNK_SIZE,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment