[tests] fix tests

04629132 · zhuwenwen · 07c69390 · 04629132 · 04629132 · 04629132
Commit 04629132 authored Jun 12, 2025 by zhuwenwen
12 changed files
--- a/tests/models/multimodal/processing/test_llava_onevision.py
+++ b/tests/models/multimodal/processing/test_llava_onevision.py
@@ -3,6 +3,7 @@
 import itertools
 from functools import partial
+import os
 import pytest
 from PIL import Image
 from pqdm.threads import pqdm
@@ -12,6 +13,7 @@ from vllm.multimodal.parse import ImageSize
 from vllm.multimodal.processing import BaseMultiModalProcessor
 from ...utils import build_model_context
+from ....utils import models_path_prefix
 def _validate_image_max_tokens_one(
@@ -33,7 +35,7 @@ def _validate_image_max_tokens_one(
 @pytest.mark.skip("This test takes around 5 minutes to run. "
                  "Comment this out to run it manually.")
 @pytest.mark.parametrize("model_id",
-                         ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
+                         [os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")])
 def test_processor_max_tokens(model_id):
    ctx = build_model_context(
        model_id,
@@ -127,7 +129,7 @@ def _test_image_prompt_replacements(
 @pytest.mark.parametrize("model_id",
-                         ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
+                         [os.path.join(models_path_prefix, "llava-hf/llava-onevision-qwen2-0.5b-ov-hf")])
 @pytest.mark.parametrize("num_imgs", [1, 2])
 def test_processor_prompt_replacements_regression(model_id, num_imgs):
    ctx = build_model_context(
@@ -180,4 +182,4 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
        processor,
        num_imgs=num_imgs,
        image_sizes=image_sizes,
    )
\ No newline at end of file
--- a/tests/models/multimodal/processing/test_mllama.py
+++ b/tests/models/multimodal/processing/test_mllama.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for mllama's multimodal preprocessing and profiling."""
+import os
 import pytest
 from transformers import MllamaConfig
@@ -7,10 +8,11 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
 from vllm.multimodal.profiling import MultiModalProfiler
 from ...utils import build_model_context
+from ....utils import models_path_prefix
 @pytest.mark.parametrize("model_id",
-                         ["meta-llama/Llama-3.2-11B-Vision-Instruct"])
+                         [os.path.join(models_path_prefix, "meta-llama/Llama-3.2-11B-Vision-Instruct")])
 @pytest.mark.parametrize("max_model_len", [4096, 8192, 25600, 131072])
 @pytest.mark.parametrize("max_num_seqs", [1, 2, 8])
 def test_profiling(
@@ -68,4 +70,4 @@ def test_profiling(
    # simulate mllama image-present prefill.
    for actual_len, last_group_len in zip(actual_encoder_seq_lens,
                                          encoder_seq_lens):
        assert actual_len >= last_group_len
\ No newline at end of file
--- a/tests/models/multimodal/processing/test_phi3v.py
+++ b/tests/models/multimodal/processing/test_phi3v.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for phi3v's multimodal preprocessing kwargs."""
+import os
 import pytest
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from ....conftest import _ImageAssets
 from ...utils import build_model_context
+from ....utils import models_path_prefix
-@pytest.mark.parametrize("model_id", ["microsoft/Phi-3.5-vision-instruct"])
+@pytest.mark.parametrize("model_id", [os.path.join(models_path_prefix, "microsoft/Phi-3.5-vision-instruct")])
 # yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
@@ -50,4 +52,4 @@ def test_processor_override(
    # Ensure we have the right number of placeholders per num_crops size
    img_tok_count = processed_inputs["prompt_token_ids"].count(_IMAGE_TOKEN_ID)
    assert img_tok_count == expected_toks_per_img * num_imgs
\ No newline at end of file
--- a/tests/models/multimodal/processing/test_phi4mm.py
+++ b/tests/models/multimodal/processing/test_phi4mm.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for phi4mm's multimodal preprocessing kwargs."""
+import os
 import pytest
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from ....conftest import _ImageAssets
 from ...utils import build_model_context
+from ....utils import models_path_prefix
-@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-multimodal-instruct"])
+@pytest.mark.parametrize("model_id", [os.path.join(models_path_prefix, "microsoft/Phi-4-multimodal-instruct")])
 # yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
@@ -56,4 +58,4 @@ def test_processor_override(
    # Ensure we have the right number of placeholders per num_crops size
    img_tok_count = processed_inputs["prompt_token_ids"].count(
        _IMAGE_PLACEHOLDER_TOKEN_ID)
    assert img_tok_count == expected_toks_per_img * num_imgs
\ No newline at end of file
--- a/tests/models/multimodal/processing/test_qwen2_vl.py
+++ b/tests/models/multimodal/processing/test_qwen2_vl.py
 # SPDX-License-Identifier: Apache-2.0
+import os
 import pytest
 from vllm.multimodal import MULTIMODAL_REGISTRY
 from ....conftest import _ImageAssets
 from ...utils import build_model_context
+from ....utils import models_path_prefix
-@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])
+@pytest.mark.parametrize("model_id", [os.path.join(models_path_prefix, "Qwen/Qwen2-VL-2B-Instruct")])
 # yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img", "expected_pixels_shape"), [
@@ -51,4 +53,4 @@ def test_processor_override(
    assert img_tok_count == expected_toks_per_img * num_imgs
    assert pixel_shape[0] == expected_pixels_shape[0] * num_imgs
    assert pixel_shape[1] == expected_pixels_shape[1]
\ No newline at end of file
--- a/tests/models/multimodal/processing/test_smolvlm.py
+++ b/tests/models/multimodal/processing/test_smolvlm.py
 # SPDX-License-Identifier: Apache-2.0
 """Tests for smolvlm's multimodal preprocessing kwargs."""
+import os
 import pytest
 from transformers import SmolVLMConfig
@@ -7,9 +8,10 @@ from vllm.multimodal import MULTIMODAL_REGISTRY
 from ....conftest import _ImageAssets
 from ...utils import build_model_context
+from ....utils import models_path_prefix
-@pytest.mark.parametrize("model_id", ["HuggingFaceTB/SmolVLM2-2.2B-Instruct"])
+@pytest.mark.parametrize("model_id", [os.path.join(models_path_prefix, "HuggingFaceTB/SmolVLM2-2.2B-Instruct")])
 # yapf: disable
 @pytest.mark.parametrize(
    ("mm_processor_kwargs", "expected_toks_per_img"),
@@ -62,4 +64,4 @@ def test_processor_override(
    # Ensure we have the right number of placeholders per num_crops size
    image_token_id = ctx.get_hf_config().image_token_id
    img_tok_count = processed_inputs["prompt_token_ids"].count(image_token_id)
    assert img_tok_count == expected_toks_per_img * num_imgs
\ No newline at end of file
--- a/tests/models/registry.py
+++ b/tests/models/registry.py
@@ -8,7 +8,9 @@ import os
 import pytest
 from packaging.version import Version
 from transformers import __version__ as TRANSFORMERS_VERSION
+# from ..utils import models_path_prefix
+models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
 @dataclass(frozen=True)
@@ -109,8 +111,6 @@ class _HfExamplesInfo:
                pytest.skip(msg)
-models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
 # yapf: disable
 _TEXT_GENERATION_EXAMPLE_MODELS = {
    # [Decoder-only]

--- a/tests/models/test_registry.py
+++ b/tests/models/test_registry.py
@@ -2,6 +2,7 @@
 import warnings
+import os
 import pytest
 import torch.cuda
@@ -20,6 +21,8 @@ from vllm.platforms import current_platform
 from ..utils import create_new_process_for_each_test
 from .registry import HF_EXAMPLE_MODELS
+models_path_prefix = os.getenv('VLLM_OPTEST_MODELS_PATH') or os.getenv("OPTEST_MODELS_PATH")
 @pytest.mark.parametrize("model_arch", ModelRegistry.get_supported_archs())
 def test_registry_imports(model_arch):
@@ -52,12 +55,12 @@ def test_registry_imports(model_arch):
 @create_new_process_for_each_test()
 @pytest.mark.parametrize("model_arch,is_mm,init_cuda,is_ce", [
-    ("LlamaForCausalLM", False, False, False),
+    (os.path.join(models_path_prefix, "LlamaForCausalLM"), False, False, False),
-    ("MllamaForConditionalGeneration", True, False, False),
+    (os.path.join(models_path_prefix, "MllamaForConditionalGeneration"), True, False, False),
-    ("LlavaForConditionalGeneration", True, True, False),
+    (os.path.join(models_path_prefix, "LlavaForConditionalGeneration"), True, True, False),
-    ("BertForSequenceClassification", False, False, True),
+    (os.path.join(models_path_prefix, "BertForSequenceClassification"), False, False, True),
-    ("RobertaForSequenceClassification", False, False, True),
+    (os.path.join(models_path_prefix, "RobertaForSequenceClassification"), False, False, True),
-    ("XLMRobertaForSequenceClassification", False, False, True),
+    (os.path.join(models_path_prefix, "XLMRobertaForSequenceClassification"), False, False, True),
 ])
 def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
    assert ModelRegistry.is_multimodal_model(model_arch) is is_mm
@@ -77,9 +80,9 @@ def test_registry_model_property(model_arch, is_mm, init_cuda, is_ce):
 @create_new_process_for_each_test()
 @pytest.mark.parametrize("model_arch,is_pp,init_cuda", [
-    ("MLPSpeculatorPreTrainedModel", False, False),
+    (os.path.join(models_path_prefix, "MLPSpeculatorPreTrainedModel"), False, False),
-    ("DeepseekV2ForCausalLM", True, False),
+    (os.path.join(models_path_prefix, "DeepseekV2ForCausalLM"), True, False),
-    ("Qwen2VLForConditionalGeneration", True, True),
+    (os.path.join(models_path_prefix, "Qwen2VLForConditionalGeneration"), True, True),
 ])
 def test_registry_is_pp(model_arch, is_pp, init_cuda):
    assert ModelRegistry.is_pp_supported_model(model_arch) is is_pp
@@ -104,4 +107,4 @@ def test_hf_registry_coverage():
    assert not untested_archs, (
        "Please add the following architectures to "
        f"`tests/models/registry.py`: {untested_archs}")
\ No newline at end of file
--- a/tests/quantization/test_cpu_offload.py
+++ b/tests/quantization/test_cpu_offload.py
 # SPDX-License-Identifier: Apache-2.0
 # Expanded quantized model tests for CPU offloading
 # Base tests: tests/basic_correctness/test_cpu_offload.py
 import pytest
 import os
 from tests.quantization.utils import is_quant_method_supported
 from ..utils import compare_two_settings, models_path_prefix
 from vllm.platforms import current_platform
 @pytest.mark.skipif(not is_quant_method_supported("fp8") or current_platform.is_rocm(),
                    reason="fp8 is not supported on this GPU type.")
 def test_cpu_offload_fp8():
    # Test quantization of an unquantized checkpoint
    compare_two_settings(os.path.join(models_path_prefix, "meta-llama/Llama-3.2-1B-Instruct"),
                         ["--quantization", "fp8"],
                         ["--quantization", "fp8", "--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test loading a quantized checkpoint
    # compare_two_settings(os.path.join(models_path_prefix, "neuralmagic/Qwen2-1.5B-Instruct-FP8"), [],
    #                      ["--cpu-offload-gb", "1"],
    #                      max_wait_seconds=480)
 @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin") or current_platform.is_rocm(),
                    reason="gptq_marlin is not supported on this GPU type.")
 def test_cpu_offload_gptq(monkeypatch):
    # This quant method is sensitive to dummy weights, so we force real weights
    monkeypatch.setenv('VLLM_TEST_FORCE_LOAD_FORMAT', 'auto')
    # Test GPTQ Marlin
    compare_two_settings(os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"), [],
                         ["--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test GPTQ
    compare_two_settings(os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct-GPTQ-Int4"),
                         ["--quantization", "gptq"],
                         ["--quantization", "gptq", "--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
 @pytest.mark.skipif(not is_quant_method_supported("awq_marlin") or current_platform.is_rocm(),
                    reason="awq_marlin is not supported on this GPU type.")
 def test_cpu_offload_awq(monkeypatch):
    # This quant method is sensitive to dummy weights, so we force real weights
    monkeypatch.setenv('VLLM_TEST_FORCE_LOAD_FORMAT', 'auto')
    # Test AWQ Marlin
    compare_two_settings(os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct-AWQ"), [],
                         ["--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test AWQ
    compare_two_settings(os.path.join(models_path_prefix, "Qwen/Qwen2-1.5B-Instruct-AWQ"),
                         ["--quantization", "awq"],
                         ["--quantization", "awq", "--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
 @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin") or current_platform.is_rocm(),
                    reason="gptq_marlin is not supported on this GPU type.")
 def test_cpu_offload_compressed_tensors(monkeypatch):
    # This quant method is sensitive to dummy weights, so we force real weights
    monkeypatch.setenv('VLLM_TEST_FORCE_LOAD_FORMAT', 'auto')
    # Test wNa16
    compare_two_settings(os.path.join(models_path_prefix, "nm-testing/tinyllama-oneshot-w4a16-channel-v2"), [],
                         ["--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test w4a16_marlin24
    compare_two_settings(os.path.join(models_path_prefix, "nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t"),
                         [], ["--cpu-offload-gb", "1"],
                         max_wait_seconds=480)
    # Test w8a8
    compare_two_settings(
        os.path.join(models_path_prefix, "nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change"), [],
        ["--cpu-offload-gb", "1"],
        max_wait_seconds=480)
--- a/tests/test_embedded_commit.py
+++ b/tests/test_embedded_commit.py
 # SPDX-License-Identifier: Apache-2.0
 import vllm
 def test_embedded_commit_defined():
    assert hasattr(vllm, "__version__")
    assert hasattr(vllm, "__version_tuple__")
    assert vllm.__version__ != "dev"
    assert vllm.__version_tuple__ != (0, 0, "dev")
--- a/tests/test_seed_behavior.py
+++ b/tests/test_seed_behavior.py
 # SPDX-License-Identifier: Apache-2.0
 import random
 import numpy as np
 import torch
 from vllm.platforms.interface import Platform
 def test_seed_behavior():
    # Test with a specific seed
    Platform.seed_everything(42)
    random_value_1 = random.randint(0, 100)
    np_random_value_1 = np.random.randint(0, 100)
    torch_random_value_1 = torch.randint(0, 100, (1, )).item()
    Platform.seed_everything(42)
    random_value_2 = random.randint(0, 100)
    np_random_value_2 = np.random.randint(0, 100)
    torch_random_value_2 = torch.randint(0, 100, (1, )).item()
    assert random_value_1 == random_value_2
    assert np_random_value_1 == np_random_value_2
    assert torch_random_value_1 == torch_random_value_2
--- a/tests/weight_loading/run_model_weight_loading_test.sh
+++ b/tests/weight_loading/run_model_weight_loading_test.sh