add VLLM_USE_PIECEWISE to use piecewise

9be76efd · zhuwenwen · 77599fa7 · 9be76efd · 9be76efd
Commit 9be76efd authored Nov 26, 2025 by zhuwenwen
Hide whitespace changes
Inline Side-by-side

Showing with 8 additions and 1 deletion

vllm/config/compilation.py vllm/config/compilation.py +2 -1

vllm/envs.py vllm/envs.py +6 -0

No files found.
--- a/vllm/config/compilation.py
+++ b/vllm/config/compilation.py
@@ -14,6 +14,7 @@ from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
 from vllm.config.utils import config
 from vllm.logger import init_logger
 from vllm.utils import is_torch_equal_or_newer, resolve_obj_by_qualname
+from vllm import envs
 if TYPE_CHECKING:
    from vllm.config import VllmConfig
@@ -56,7 +57,7 @@ class CUDAGraphMode(enum.Enum):
    def max_cudagraph_mode(self) -> 'CUDAGraphMode':
        return CUDAGraphMode(max(
-            self.value)) if self.separate_routine() else self
+            self.value) if not envs.VLLM_USE_PIECEWISE else min(self.value)) if self.separate_routine() else self
    def has_full_cudagraphs(self) -> bool:
        return self.max_cudagraph_mode() == CUDAGraphMode.FULL

--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -237,6 +237,7 @@ if TYPE_CHECKING:
    USE_FUSED_SILU_MUL_QUANT: bool = False
    VLLM_USE_PD_SPLIT: bool = False
    VLLM_USE_PP_SYNC: bool = False
+    VLLM_USE_PIECEWISE: bool = False
 def get_default_cache_root():
@@ -1648,6 +1649,11 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_USE_PP_SYNC":
        lambda: (os.environ.get("VLLM_USE_PP_SYNC", "False").lower() in
                 ("true", "1")), 
+    # vLLM will use piecewise
+    "VLLM_USE_PIECEWISE":
+        lambda: (os.environ.get("VLLM_USE_PIECEWISE", "True").lower() in
+                 ("true", "1")), 
 }
 # --8<-- [end:env-vars-definition]