Unverified Commit 0b6a8a30 authored by Ilya Markov's avatar Ilya Markov Committed by GitHub
Browse files

[BugFix] Fix non detected failing tests (#30277)


Signed-off-by: default avatarilmarkov <markovilya197@gmail.com>
parent 804e3468
...@@ -468,7 +468,9 @@ steps: ...@@ -468,7 +468,9 @@ steps:
# tests covered elsewhere. # tests covered elsewhere.
# Use `find` to launch multiple instances of pytest so that # Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965 # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
- "find compile/ -maxdepth 1 -name 'test_*.py' -exec pytest -s -v {} \\\\;" # However, find does not normally propagate error codes, so we combine it with xargs
# (using -0 for proper path handling)
- "find compile/ -maxdepth 1 -name 'test_*.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
- label: PyTorch Fullgraph Smoke Test # 15min - label: PyTorch Fullgraph Smoke Test # 15min
timeout_in_minutes: 30 timeout_in_minutes: 30
...@@ -482,7 +484,9 @@ steps: ...@@ -482,7 +484,9 @@ steps:
# as it is a heavy test that is covered in other steps. # as it is a heavy test that is covered in other steps.
# Use `find` to launch multiple instances of pytest so that # Use `find` to launch multiple instances of pytest so that
# they do not suffer from https://github.com/vllm-project/vllm/issues/28965 # they do not suffer from https://github.com/vllm-project/vllm/issues/28965
- "find compile/fullgraph/ -name 'test_*.py' -not -name 'test_full_graph.py' -exec pytest -s -v {} \\\\;" # However, find does not normally propagate error codes, so we combine it with xargs
# (using -0 for proper path handling)
- "find compile/fullgraph -maxdepth 1 -name 'test_*.py' -not -name 'test_full_graph.py' -print0 | xargs -0 -n1 -I{} pytest -s -v '{}'"
- label: PyTorch Fullgraph Test # 27min - label: PyTorch Fullgraph Test # 27min
timeout_in_minutes: 40 timeout_in_minutes: 40
......
...@@ -17,7 +17,6 @@ def test_compile(): ...@@ -17,7 +17,6 @@ def test_compile():
# forked needed to workaround https://github.com/vllm-project/vllm/issues/21073 # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073
@pytest.mark.forked @pytest.mark.forked
@pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda") @pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda")
@pytest.mark.xfail
def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch): def test_qwen2_5_vl_compilation(vllm_runner, monkeypatch):
"""Test that Qwen2.5-VL vision submodules are compiled. """Test that Qwen2.5-VL vision submodules are compiled.
......
...@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache): ...@@ -80,6 +80,8 @@ def test_compile_ranges(use_fresh_inductor_cache):
vllm_config = VllmConfig( vllm_config = VllmConfig(
scheduler_config=SchedulerConfig( scheduler_config=SchedulerConfig(
max_num_batched_tokens=8192, max_num_batched_tokens=8192,
max_model_len=8192,
is_encoder_decoder=False,
), ),
compilation_config=CompilationConfig( compilation_config=CompilationConfig(
mode=CompilationMode.VLLM_COMPILE, mode=CompilationMode.VLLM_COMPILE,
...@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges(): ...@@ -112,6 +114,8 @@ def test_compile_config_get_compile_ranges():
VllmConfig( VllmConfig(
scheduler_config=SchedulerConfig( scheduler_config=SchedulerConfig(
max_num_batched_tokens=8192, max_num_batched_tokens=8192,
max_model_len=8192,
is_encoder_decoder=False,
), ),
compilation_config=compilation_config, compilation_config=compilation_config,
) )
...@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache): ...@@ -134,6 +138,8 @@ def test_inductor_cache_compile_ranges(monkeypatch, use_fresh_inductor_cache):
) )
scheduler_config = SchedulerConfig( scheduler_config = SchedulerConfig(
max_num_batched_tokens=8192, max_num_batched_tokens=8192,
max_model_len=8192,
is_encoder_decoder=False,
) )
torch.set_default_device("cuda") torch.set_default_device("cuda")
......
...@@ -5,9 +5,14 @@ import copy ...@@ -5,9 +5,14 @@ import copy
import pytest import pytest
import torch import torch
from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass from vllm.compilation.inductor_pass import (
CallableInductorPass,
InductorPass,
pass_context,
)
from vllm.compilation.pass_manager import PostGradPassManager from vllm.compilation.pass_manager import PostGradPassManager
from vllm.config import ModelConfig, VllmConfig from vllm.config import ModelConfig, VllmConfig
from vllm.config.utils import Range
# dummy custom pass that doesn't inherit # dummy custom pass that doesn't inherit
...@@ -42,6 +47,8 @@ class ProperPass(InductorPass): ...@@ -42,6 +47,8 @@ class ProperPass(InductorPass):
], ],
) )
def test_pass_manager_uuid(callable): def test_pass_manager_uuid(callable):
# Set the pass context as PassManager uuid uses it
with pass_context(Range(start=1, end=8)):
# Some passes need dtype to be set # Some passes need dtype to be set
config = VllmConfig(model_config=ModelConfig(dtype=torch.bfloat16)) config = VllmConfig(model_config=ModelConfig(dtype=torch.bfloat16))
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from __future__ import annotations
import functools import functools
import hashlib import hashlib
import inspect import inspect
...@@ -8,15 +10,17 @@ import json ...@@ -8,15 +10,17 @@ import json
import types import types
from collections.abc import Callable from collections.abc import Callable
from contextlib import contextmanager from contextlib import contextmanager
from typing import Any from typing import TYPE_CHECKING, Any
import torch import torch
from torch import fx from torch import fx
from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily from torch._subclasses.fake_tensor import FakeTensorMode, unset_fake_temporarily
from vllm.config.utils import Range
from vllm.utils.torch_utils import is_torch_equal_or_newer from vllm.utils.torch_utils import is_torch_equal_or_newer
if TYPE_CHECKING:
from vllm.config.utils import Range
if is_torch_equal_or_newer("2.6"): if is_torch_equal_or_newer("2.6"):
from torch._inductor.custom_graph_pass import CustomGraphPass from torch._inductor.custom_graph_pass import CustomGraphPass
else: else:
......
...@@ -53,8 +53,27 @@ class PiecewiseBackend: ...@@ -53,8 +53,27 @@ class PiecewiseBackend:
self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1 self.is_last_graph = piecewise_compile_index == total_piecewise_compiles - 1
self.is_full_graph = total_piecewise_compiles == 1 self.is_full_graph = total_piecewise_compiles == 1
# TODO: we need to generalize encoder compilation to other models
self.is_encoder_compilation = vllm_backend.prefix in [
"Qwen2_5_VisionPatchEmbed",
"Qwen2_5_VisionPatchMerger",
"Qwen2_5_VisionBlock",
]
self.compile_ranges = self.compilation_config.get_compile_ranges() self.compile_ranges = self.compilation_config.get_compile_ranges()
if self.is_encoder_compilation:
# For encoder compilation we use the max int32 value
# to set the upper bound of the compile ranges
max_int32 = 2**31 - 1
last_compile_range = self.compile_ranges[-1]
assert (
last_compile_range.end
== vllm_config.scheduler_config.max_num_batched_tokens
)
self.compile_ranges[-1] = Range(
start=last_compile_range.start, end=max_int32
)
log_string = f"PiecewiseBackend: compile_ranges: {self.compile_ranges}" log_string = f"PiecewiseBackend: compile_ranges: {self.compile_ranges}"
logger.debug_once(log_string) logger.debug_once(log_string)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment