"vscode:/vscode.git/clone" did not exist on "0a0aa07747cf851228598334008366ff2a8fd760"
Unverified Commit 608bb144 authored by 22quinn's avatar 22quinn Committed by GitHub
Browse files

[Attention] Remove max cudagraph size limit of 992 (#27840)


Signed-off-by: default avatar22quinn <33176974+22quinn@users.noreply.github.com>
parent 4a36681f
...@@ -244,13 +244,6 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad ...@@ -244,13 +244,6 @@ class FlashAttentionMetadataBuilder(AttentionMetadataBuilder[FlashAttentionMetad
self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size
if self.use_full_cuda_graph and self.aot_schedule: if self.use_full_cuda_graph and self.aot_schedule:
if self.max_cudagraph_size > 992:
# This condition derives from FA3's internal heuristic.
# TODO(woosuk): Support larger cudagraph sizes.
raise ValueError(
"Capture size larger than 992 is not supported for full cuda graph."
)
self.scheduler_metadata = torch.zeros( self.scheduler_metadata = torch.zeros(
vllm_config.scheduler_config.max_num_seqs + 1, vllm_config.scheduler_config.max_num_seqs + 1,
dtype=torch.int32, dtype=torch.int32,
......
...@@ -97,13 +97,6 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata] ...@@ -97,13 +97,6 @@ class FlashAttnMLAMetadataBuilder(MLACommonMetadataBuilder[FlashAttnMLAMetadata]
self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size self.max_cudagraph_size = self.compilation_config.max_cudagraph_capture_size
if self.use_full_cuda_graph and self.fa_aot_schedule: if self.use_full_cuda_graph and self.fa_aot_schedule:
if self.max_cudagraph_size > 992:
# This condition derives from FA3's internal heuristic.
# TODO(woosuk): Support larger cudagraph sizes.
raise ValueError(
"Capture size larger than 992 is not supported for full cuda graph."
)
self.scheduler_metadata = torch.zeros( self.scheduler_metadata = torch.zeros(
vllm_config.scheduler_config.max_num_seqs + 1, vllm_config.scheduler_config.max_num_seqs + 1,
dtype=torch.int32, dtype=torch.int32,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment