[Bug] Fix env string `"0"` same to `True` (#28159)

Signed-off-by: yewentao256 <zhyanwentao@126.com>

[Bug] Fix env string `"0"` same to `True` (#28159)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
90189c71 · Wentao Ye · GitHub · d79d9f07 · 90189c71
Unverified Commit 90189c71 authored Nov 05, 2025 by Wentao Ye Committed by GitHub Nov 05, 2025
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 4 deletions

vllm/envs.py vllm/envs.py +6 -4

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -776,7 +776,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
    # If set, the OpenAI API server will stay alive even after the underlying
    # AsyncLLMEngine errors and stops serving requests
    "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH": lambda: bool(
-        os.getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", 0)
+        int(os.getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", "0"))
    ),
    # If the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN is set, it allows
    # the user to specify a max sequence length greater than
@@ -1313,7 +1313,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
    ),
    # If set, it means we pre-downloaded cubin files and flashinfer will
    # read the cubin files directly.
-    "VLLM_HAS_FLASHINFER_CUBIN": lambda: os.getenv("VLLM_HAS_FLASHINFER_CUBIN", False),
+    "VLLM_HAS_FLASHINFER_CUBIN": lambda: bool(
+        int(os.getenv("VLLM_HAS_FLASHINFER_CUBIN", "0"))
+    ),
    # Supported options:
    # - "flashinfer-cudnn": use flashinfer cudnn GEMM backend
    # - "flashinfer-trtllm": use flashinfer trtllm GEMM backend
@@ -1449,8 +1451,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
    #                                      top 5 collected objects
    "VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""),
    # Disables parallel execution of shared_experts via separate cuda stream
-    "VLLM_DISABLE_SHARED_EXPERTS_STREAM": lambda: os.getenv(
-        "VLLM_DISABLE_SHARED_EXPERTS_STREAM", False
+    "VLLM_DISABLE_SHARED_EXPERTS_STREAM": lambda: bool(
+        int(os.getenv("VLLM_DISABLE_SHARED_EXPERTS_STREAM", "0"))
    ),
    # Format for saving torch.compile cache artifacts
    # - "binary": saves as binary file