[BugFix] Fix non detected failing tests (#30277)

Signed-off-by: ilmarkov <markovilya197@gmail.com>

[BugFix] Fix non detected failing tests (#30277)
Signed-off-by: ilmarkov <markovilya197@gmail.com>
0b6a8a30 · Ilya Markov · GitHub · 804e3468 · 0b6a8a30 · 0b6a8a30
Unverified Commit 0b6a8a30 authored Dec 09, 2025 by Ilya Markov Committed by GitHub Dec 09, 2025
6 changed files
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -468,7 +468,9 @@ steps:
  # tests covered elsewhere.
  # Use `find` to launch multiple instances of pytest so that
  # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-  - "find compile/ -maxdepth 1 -name 'test_*.py' -exec pytest -s -v {} \\\\;"
+  # However, find does not normally propagate error codes, so we combine it with xargs
+  # (using -0 for proper path handling)
+  - "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 - label: PyTorch Fullgraph Smoke Test # 15min
  timeout_in_minutes: 30
@@ -482,7 +484,9 @@ steps:
  # as it is a heavy test that is covered in other steps.
  # Use `find` to launch multiple instances of pytest so that
  # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
-  - "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\;"
+  # However, find does not normally propagate error codes, so we combine it with xargs
+  # (using -0 for proper path handling)
+  - "find compile/fullgraph -maxdepth 1 -name 'test_*.py' -not -name 'test_full_graph.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
 - label: PyTorch Fullgraph Test # 27min
  timeout_in_minutes: 40

--- a/tests/compile/fullgraph/test_multimodal_compile.py
+++ b/tests/compile/fullgraph/test_multimodal_compile.py
@@ -17,7 +17,6 @@ def test_compile():
 # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
 @pytest.mark.forked
 @pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
-@pytest.mark.xfail
 def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
    """Test that Qwen2.5-VL vision submodules are compiled.

--- a/tests/compile/test_compile_ranges.py
+++ b/tests/compile/test_compile_ranges.py
@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache):
    vllm_config = VllmConfig(
        scheduler_config=SchedulerConfig(
            max_num_batched_tokens=8192,
+            max_model_len=8192,
+            is_encoder_decoder=False,
        ),
        compilation_config=CompilationConfig(
            mode=CompilationMode.VLLM_COMPILE,
@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges():
    VllmConfig(
        scheduler_config=SchedulerConfig(
            max_num_batched_tokens=8192,
+            max_model_len=8192,
+            is_encoder_decoder=False,
        ),
        compilation_config=compilation_config,
    )
@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache):
    )
    scheduler_config = SchedulerConfig(
        max_num_batched_tokens=8192,
+        max_model_len=8192,
+        is_encoder_decoder=False,
    )
    torch.set_default_device("cuda")

--- a/tests/compile/test_pass_manager.py
+++ b/tests/compile/test_pass_manager.py
@@ -5,9 +5,14 @@ import copy
 import pytest
 import torch
-from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass
+from vllm.compilation.inductor_pass import (
+    CallableInductorPass,
+    InductorPass,
+    pass_context,
+)
 from vllm.compilation.pass_manager import PostGradPassManager
 from vllm.config import ModelConfig, VllmConfig
+from vllm.config.utils import Range
 # dummy custom pass that doesn't inherit
@@ -42,6 +47,8 @@ class ProperPass(InductorPass):
    ],
 )
 def test_pass_manager_uuid(callable):
+    # Set the pass context as PassManager uuid uses it
+    with pass_context(Range(start=1, end=8)):
        # Some passes need dtype to be set
        config = VllmConfig(model_config=ModelConfig(dtype=torch.bfloat16))

--- a/vllm/compilation/inductor_pass.py
+++ b/vllm/compilation/inductor_pass.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+from __future__ import annotations
 import functools
 import hashlib
 import inspect
@@ -8,15 +10,17 @@ import json
 import types
 from collections.abc import Callable
 from contextlib import contextmanager
-from typing import Any
+from typing import TYPE_CHECKING, Any
 import torch
 from torch import fx
 from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily
-from vllm.config.utils import Range
 from vllm.utils.torch_utils import is_torch_equal_or_newer
+if TYPE_CHECKING:
+    from vllm.config.utils import Range
 if is_torch_equal_or_newer("2.6"):
    from torch._inductor.custom_graph_pass import CustomGraphPass
 else:

--- a/vllm/compilation/piecewise_backend.py
+++ b/vllm/compilation/piecewise_backend.py
@@ -53,8 +53,27 @@ class PiecewiseBackend:
        self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1
        self.is_full_graph = total_piecewise_compiles == 1
+        # TODO: we need to generalize encoder compilation to other models
+        self.is_encoder_compilation = vllm_backend.prefix in [
+            "Qwen2_5_VisionPatchEmbed",
+            "Qwen2_5_VisionPatchMerger",
+            "Qwen2_5_VisionBlock",
+        ]
        self.compile_ranges = self.compilation_config.get_compile_ranges()
+        if self.is_encoder_compilation:
+            # For encoder compilation we use the max int32 value
+            # to set the upper bound of the compile ranges
+            max_int32 = 2**31 - 1
+            last_compile_range = self.compile_ranges[-1]
+            assert (
+                last_compile_range.end
+                == vllm_config.scheduler_config.max_num_batched_tokens
+            )
+            self.compile_ranges[-1] = Range(
+                start=last_compile_range.start, end=max_int32
+            )
        log_string = f"PiecewiseBackend: compile_ranges: {self.compile_ranges}"
        logger.debug_once(log_string)