update max_seq_len_to_capture to int

988fc31c · zhuwenwen · 72273242 · 988fc31c
Commit 988fc31c authored Jul 22, 2025 by zhuwenwen
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 2 deletions

vllm/config.py vllm/config.py +1 -2

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -313,8 +313,7 @@ class ModelConfig:
    graph and always execute the model in eager mode. If False, we will use
    CUDA graph and eager execution in hybrid for maximal performance and
    flexibility."""
-    # max_seq_len_to_capture: int = 8192
-    max_seq_len_to_capture: bool = None
+    max_seq_len_to_capture: int = None # 8192
    """Maximum sequence len covered by CUDA graphs. When a sequence has context
    length larger than this, we fall back to eager mode. Additionally for
    encoder-decoder models, if the sequence length of the encoder input is