[XPU] Use spawn with XPU multiprocessing (#20649)

Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>

[XPU] Use spawn with XPU multiprocessing (#20649)
Signed-off-by: Dmitry Rogozhkin <dmitry.v.rogozhkin@intel.com>
e760fcef · Dmitry Rogozhkin · GitHub · 6bbf1795 · e760fcef · e760fcef
Unverified Commit e760fcef authored Jul 09, 2025 by Dmitry Rogozhkin Committed by GitHub Jul 09, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 15 additions and 5 deletions

tests/utils.py tests/utils.py +4 -3

tests/v1/e2e/test_cascade_attention.py tests/v1/e2e/test_cascade_attention.py +2 -2

vllm/utils/__init__.py vllm/utils/__init__.py +9 -0

No files found.
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -818,14 +818,15 @@ def create_new_process_for_each_test(
    Args:
        method: The process creation method. Can be either "spawn" or "fork". 
-               If not specified,
+               If not specified, it defaults to "spawn" on ROCm and XPU
-               it defaults to "spawn" on ROCm platforms and "fork" otherwise.
+               platforms and "fork" otherwise.
    Returns:
        A decorator to run test functions in separate processes.
    """
    if method is None:
-        method = "spawn" if current_platform.is_rocm() else "fork"
+        use_spawn = current_platform.is_rocm() or current_platform.is_xpu()
+        method = "spawn" if use_spawn else "fork"
    assert method in ["spawn",
                      "fork"], "Method must be either 'spawn' or 'fork'"

--- a/tests/v1/e2e/test_cascade_attention.py
+++ b/tests/v1/e2e/test_cascade_attention.py
@@ -5,10 +5,10 @@ import pytest
 from vllm import LLM, SamplingParams
-from ...utils import fork_new_process_for_each_test
+from ...utils import create_new_process_for_each_test
-@fork_new_process_for_each_test
+@create_new_process_for_each_test()
 @pytest.mark.parametrize("attn_backend",
                         ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"])
 def test_cascade_attention(example_system_message, monkeypatch, attn_backend):

--- a/vllm/utils/__init__.py
+++ b/vllm/utils/__init__.py
@@ -1535,6 +1535,13 @@ def cuda_is_initialized() -> bool:
    return torch.cuda.is_initialized()
+def xpu_is_initialized() -> bool:
+    """Check if XPU is initialized."""
+    if not torch.xpu._is_compiled():
+        return False
+    return torch.xpu.is_initialized()
 def cuda_get_device_properties(device,
                               names: Sequence[str],
                               init_cuda=False) -> tuple[Any, ...]:
@@ -2848,6 +2855,8 @@ def _maybe_force_spawn():
    reason = None
    if cuda_is_initialized():
        reason = "CUDA is initialized"
+    elif xpu_is_initialized():
+        reason = "XPU is initialized"
    elif is_in_ray_actor():
        # even if we choose to spawn, we need to pass the ray address
        # to the subprocess so that it knows how to connect to the ray cluster.