Fix env vars for running Ray distributed backend on GKE (#15166)

Signed-off-by: Richard Liu <ricliu@google.com>

Fix env vars for running Ray distributed backend on GKE (#15166)
Signed-off-by: Richard Liu <ricliu@google.com>
a8f12a63 · Richard Liu · GitHub · 69ae2380 · a8f12a63 · a8f12a63
Unverified Commit a8f12a63 authored Mar 20, 2025 by Richard Liu Committed by GitHub Mar 20, 2025
Showing with 8 additions and 0 deletions

vllm/executor/ray_distributed_executor.py vllm/executor/ray_distributed_executor.py +2 -0

vllm/platforms/interface.py vllm/platforms/interface.py +2 -0

vllm/platforms/tpu.py vllm/platforms/tpu.py +4 -0

No files found.
--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -340,6 +340,8 @@ class RayDistributedExecutor(DistributedExecutorBase):
            and v not in self.non_carry_over_env_vars
        ]
+        env_vars_to_copy.extend(current_platform.additional_env_vars)
        # Copy existing env vars to each worker's args
        for args in all_args_to_update_environment_variables:
            # TODO: refactor platform-specific env vars

--- a/vllm/platforms/interface.py
+++ b/vllm/platforms/interface.py
@@ -112,6 +112,8 @@ class Platform:
    supported_quantization: list[str] = []
+    additional_env_vars: list[str] = []
    def is_cuda(self) -> bool:
        return self._enum == PlatformEnum.CUDA

--- a/vllm/platforms/tpu.py
+++ b/vllm/platforms/tpu.py
@@ -29,6 +29,10 @@ class TpuPlatform(Platform):
        "tpu_int8", "compressed-tensors", "compressed_tensors"
    ]
+    additional_env_vars: list[str] = [
+        "TPU_CHIPS_PER_HOST_BOUNDS", "TPU_HOST_BOUNDS"
+    ]
    @classmethod
    def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int,
                             dtype: torch.dtype, kv_cache_dtype: Optional[str],