[Config] Remove Unused Environment Variable `VLLM_DISABLE_PAD_FOR_CUDAGRAPH` (#26743)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Config] Remove Unused Environment Variable `VLLM_DISABLE_PAD_FOR_CUDAGRAPH` (#26743)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
6d87a283 · Wentao Ye · GitHub · e6cdbd67 · 6d87a283 · 6d87a283
Unverified Commit 6d87a283 authored Oct 14, 2025 by Wentao Ye Committed by GitHub Oct 14, 2025
Show whitespace changes
Inline Side-by-side

Showing with 0 additions and 8 deletions

vllm/envs.py vllm/envs.py +0 -7

vllm/v1/worker/gpu_model_runner.py vllm/v1/worker/gpu_model_runner.py +0 -1

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -198,7 +198,6 @@ if TYPE_CHECKING:
    VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False
    VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True
    VLLM_TUNED_CONFIG_FOLDER: str | None = None
-    VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False
    VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False
    VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False
    VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False
@@ -1304,12 +1303,6 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_ENABLE_CUDAGRAPH_GC": lambda: bool(
        int(os.getenv("VLLM_ENABLE_CUDAGRAPH_GC", "0"))
    ),
-    # Disable padding to CUDA graph capture batch sizes.
-    # TODO(wentao): https://github.com/vllm-project/vllm/issues/23378
-    # After the issue is fixed, we can remove this flag.
-    "VLLM_DISABLE_PAD_FOR_CUDAGRAPH": lambda: bool(
-        int(os.getenv("VLLM_DISABLE_PAD_FOR_CUDAGRAPH", "0"))
-    ),
    # Used to force set up loopback IP
    "VLLM_LOOPBACK_IP": lambda: os.getenv("VLLM_LOOPBACK_IP", ""),
    # Used to set the process name prefix for vLLM processes.

--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -2067,7 +2067,6 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin):
    def _get_num_input_tokens(self, num_scheduled_tokens: int) -> int:
        if (
            self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE
-            and not envs.VLLM_DISABLE_PAD_FOR_CUDAGRAPH
            and hasattr(self, "cudagraph_batch_sizes")
            and self.cudagraph_batch_sizes
            and num_scheduled_tokens <= self.cudagraph_batch_sizes[-1]