更改默认的full _cuda_graph启动方式为false

3dad13fb · gaoqiong · 04b61f0e · 3dad13fb
Commit 3dad13fb authored Aug 10, 2025 by gaoqiong
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 2 deletions

vllm/config.py vllm/config.py +2 -2

No files found.
--- a/vllm/config.py
+++ b/vllm/config.py
@@ -4106,7 +4106,7 @@ class CompilationConfig:
    are always used, it can set this to False. Otherwise, it should
    set this to True, and the compiler will copy the input to an
    internally managed buffer. Default is False."""
-    full_cuda_graph: bool = True
+    full_cuda_graph: bool = False
    """whether to use a full cuda graph for the entire forward pass rather than
    splitting certain operations such as attention into subgraphs. Thus this
    flag cannot be used together with splitting_ops. This may provide
@@ -4948,4 +4948,4 @@ def get_layers_from_vllm_config(vllm_config: VllmConfig,
        for layer_name, layer in
        vllm_config.compilation_config.static_forward_context.items()
        if isinstance(layer, layer_type)
    }
\ No newline at end of file