Unverified Commit 90189c71 authored by Wentao Ye's avatar Wentao Ye Committed by GitHub
Browse files

[Bug] Fix env string `"0"` same to `True` (#28159)


Signed-off-by: default avataryewentao256 <zhyanwentao@126.com>
parent d79d9f07
...@@ -776,7 +776,7 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -776,7 +776,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# If set, the OpenAI API server will stay alive even after the underlying # If set, the OpenAI API server will stay alive even after the underlying
# AsyncLLMEngine errors and stops serving requests # AsyncLLMEngine errors and stops serving requests
"VLLM_KEEP_ALIVE_ON_ENGINE_DEATH": lambda: bool( "VLLM_KEEP_ALIVE_ON_ENGINE_DEATH": lambda: bool(
os.getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", 0) int(os.getenv("VLLM_KEEP_ALIVE_ON_ENGINE_DEATH", "0"))
), ),
# If the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN is set, it allows # If the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN is set, it allows
# the user to specify a max sequence length greater than # the user to specify a max sequence length greater than
...@@ -1313,7 +1313,9 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1313,7 +1313,9 @@ environment_variables: dict[str, Callable[[], Any]] = {
), ),
# If set, it means we pre-downloaded cubin files and flashinfer will # If set, it means we pre-downloaded cubin files and flashinfer will
# read the cubin files directly. # read the cubin files directly.
"VLLM_HAS_FLASHINFER_CUBIN": lambda: os.getenv("VLLM_HAS_FLASHINFER_CUBIN", False), "VLLM_HAS_FLASHINFER_CUBIN": lambda: bool(
int(os.getenv("VLLM_HAS_FLASHINFER_CUBIN", "0"))
),
# Supported options: # Supported options:
# - "flashinfer-cudnn": use flashinfer cudnn GEMM backend # - "flashinfer-cudnn": use flashinfer cudnn GEMM backend
# - "flashinfer-trtllm": use flashinfer trtllm GEMM backend # - "flashinfer-trtllm": use flashinfer trtllm GEMM backend
...@@ -1449,8 +1451,8 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -1449,8 +1451,8 @@ environment_variables: dict[str, Callable[[], Any]] = {
# top 5 collected objects # top 5 collected objects
"VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""), "VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""),
# Disables parallel execution of shared_experts via separate cuda stream # Disables parallel execution of shared_experts via separate cuda stream
"VLLM_DISABLE_SHARED_EXPERTS_STREAM": lambda: os.getenv( "VLLM_DISABLE_SHARED_EXPERTS_STREAM": lambda: bool(
"VLLM_DISABLE_SHARED_EXPERTS_STREAM", False int(os.getenv("VLLM_DISABLE_SHARED_EXPERTS_STREAM", "0"))
), ),
# Format for saving torch.compile cache artifacts # Format for saving torch.compile cache artifacts
# - "binary": saves as binary file # - "binary": saves as binary file
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment