Directly get max encoder len from VLLM config in V1 (#24866)

Signed-off-by: Sugar-zsg <952242923@qq.com>

Directly get max encoder len from VLLM config in V1 (#24866)
Signed-off-by: Sugar-zsg <952242923@qq.com>
cd1f885b · Sugar · GitHub · d593cf28 · cd1f885b
Unverified Commit cd1f885b authored Sep 17, 2025 by Sugar Committed by GitHub Sep 16, 2025
Show whitespace changes
Inline Side-by-side

Showing with 7 additions and 4 deletions

vllm/attention/layers/cross_attention.py vllm/attention/layers/cross_attention.py +7 -4

No files found.
--- a/vllm/attention/layers/cross_attention.py
+++ b/vllm/attention/layers/cross_attention.py
@@ -14,7 +14,6 @@ from vllm.attention.layer import Attention
 from vllm.attention.selector import get_attn_backend
 from vllm.config import CacheConfig, VllmConfig
 from vllm.logger import init_logger
-from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.utils import cdiv
 from vllm.v1.attention.backends.utils import (CommonAttentionMetadata,
                                              subclass_attention_backend)
@@ -23,9 +22,13 @@ from vllm.v1.kv_cache_interface import CrossAttentionSpec
 logger = init_logger(__name__)


-def _get_max_encoder_len(vllm_config: VllmConfig) -> int:
-    return MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(
-        vllm_config.model_config)
+def _get_max_encoder_len(vllm_config: "VllmConfig") -> int:
+    """Gets the max number of encoder input tokens from the config.
+    """
+    sc = vllm_config.scheduler_config
+    assert sc and isinstance(sc.max_num_encoder_input_tokens, int), \
+        "max_num_encoder_input_tokens must be int for enc-dec models"
+    return sc.max_num_encoder_input_tokens


 def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray,