Commit 8c96d505 authored by 王敏's avatar 王敏
Browse files

[Perf]支持DSA架构模型开启MTP后运行full graph模式

parent 6efaf21a
...@@ -201,7 +201,7 @@ def get_max_prefill_buffer_size(vllm_config: VllmConfig): ...@@ -201,7 +201,7 @@ def get_max_prefill_buffer_size(vllm_config: VllmConfig):
class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder): class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder):
_cudagraph_support: ClassVar[AttentionCGSupport] = ( _cudagraph_support: ClassVar[AttentionCGSupport] = (
AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE AttentionCGSupport.UNIFORM_BATCH
) )
reorder_batch_threshold: int = 1 reorder_batch_threshold: int = 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment