Unverified Commit afb4429b authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

[CI/Build] Reorganize models tests (#17459)


Signed-off-by: default avatarDarkLight1337 <tlleungac@connect.ust.hk>
parent aa4502e7
...@@ -10,7 +10,7 @@ from transformers import Qwen2VLForConditionalGeneration ...@@ -10,7 +10,7 @@ from transformers import Qwen2VLForConditionalGeneration
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from ....utils import large_gpu_test from ....utils import large_gpu_test
from ..utils import check_embeddings_close from ...utils import check_embeddings_close
HF_TEXT_PROMPTS = [ HF_TEXT_PROMPTS = [
# T -> X # T -> X
......
...@@ -8,7 +8,7 @@ from vllm.platforms import current_platform ...@@ -8,7 +8,7 @@ from vllm.platforms import current_platform
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from ....utils import large_gpu_test from ....utils import large_gpu_test
from ..utils import check_embeddings_close from ...utils import check_embeddings_close
# Llava Next embedding implementation is only supported by CUDA. # Llava Next embedding implementation is only supported by CUDA.
# If run on ROCm, hf_model.model.resize_token_embeddings will # If run on ROCm, hf_model.model.resize_token_embeddings will
......
...@@ -9,7 +9,7 @@ from vllm.assets.image import VLM_IMAGES_DIR ...@@ -9,7 +9,7 @@ from vllm.assets.image import VLM_IMAGES_DIR
from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner from ....conftest import IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner
from ....utils import large_gpu_test from ....utils import large_gpu_test
from ..utils import check_embeddings_close from ...utils import check_embeddings_close
HF_TEXT_PROMPTS = [ HF_TEXT_PROMPTS = [
# T -> X # T -> X
......
...@@ -39,7 +39,6 @@ ground_truth_generations = [ ...@@ -39,7 +39,6 @@ ground_truth_generations = [
] ]
@pytest.mark.quant_model
@pytest.mark.skipif(not is_quant_method_supported("aqlm"), @pytest.mark.skipif(not is_quant_method_supported("aqlm"),
reason="AQLM is not supported on this GPU type.") reason="AQLM is not supported on this GPU type.")
@pytest.mark.parametrize("model", ["ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf"]) @pytest.mark.parametrize("model", ["ISTA-DASLab/Llama-2-7b-AQLM-2Bit-1x16-hf"])
......
...@@ -7,8 +7,8 @@ import torch ...@@ -7,8 +7,8 @@ import torch
from vllm.multimodal.image import rescale_image_size from vllm.multimodal.image import rescale_image_size
from ....conftest import IMAGE_ASSETS, VllmRunner, _ImageAssets from ...conftest import IMAGE_ASSETS, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close from ..utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -85,7 +85,6 @@ def run_awq_test( ...@@ -85,7 +85,6 @@ def run_awq_test(
) )
@pytest.mark.quant_model
@pytest.mark.parametrize( @pytest.mark.parametrize(
("source_model", "quant_model"), ("source_model", "quant_model"),
[("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")], [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")],
......
...@@ -15,7 +15,7 @@ from dataclasses import dataclass ...@@ -15,7 +15,7 @@ from dataclasses import dataclass
import pytest import pytest
from .utils import check_logprobs_close from ..utils import check_logprobs_close
@dataclass @dataclass
......
...@@ -4,20 +4,15 @@ ...@@ -4,20 +4,15 @@
"""Tests fp8 models against ground truth generation """Tests fp8 models against ground truth generation
Note: these tests will only pass on L4 GPU. Note: these tests will only pass on L4 GPU.
""" """
import os
from typing import Optional
import pytest import pytest
from tests.kernels.utils import override_backend_env_variable
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.utils import STR_BACKEND_ENV_VAR from vllm.utils import STR_BACKEND_ENV_VAR
from ...utils import check_logprobs_close from ..utils import check_logprobs_close
@pytest.mark.quant_model
@pytest.mark.skipif(not is_quant_method_supported("fp8"), @pytest.mark.skipif(not is_quant_method_supported("fp8"),
reason="fp8 is not supported on this GPU type.") reason="fp8 is not supported on this GPU type.")
@pytest.mark.parametrize( @pytest.mark.parametrize(
......
...@@ -14,9 +14,9 @@ from transformers import AutoTokenizer ...@@ -14,9 +14,9 @@ from transformers import AutoTokenizer
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from ....conftest import VllmRunner from ...conftest import VllmRunner
from ....utils import multi_gpu_test from ...utils import multi_gpu_test
from ...utils import check_logprobs_close from ..utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
...@@ -38,7 +38,6 @@ LLAMA_CONFIG = GGUFTestConfig( ...@@ -38,7 +38,6 @@ LLAMA_CONFIG = GGUFTestConfig(
original_model="meta-llama/Llama-3.2-1B-Instruct", original_model="meta-llama/Llama-3.2-1B-Instruct",
gguf_repo="bartowski/Llama-3.2-1B-Instruct-GGUF", gguf_repo="bartowski/Llama-3.2-1B-Instruct-GGUF",
gguf_filename="Llama-3.2-1B-Instruct-IQ4_XS.gguf", gguf_filename="Llama-3.2-1B-Instruct-IQ4_XS.gguf",
marks=[pytest.mark.quant_model],
) )
QWEN2_CONFIG = GGUFTestConfig( QWEN2_CONFIG = GGUFTestConfig(
......
...@@ -15,7 +15,7 @@ from dataclasses import dataclass ...@@ -15,7 +15,7 @@ from dataclasses import dataclass
import pytest import pytest
from .utils import check_logprobs_close from ..utils import check_logprobs_close
@dataclass @dataclass
......
...@@ -16,7 +16,7 @@ import pytest ...@@ -16,7 +16,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT
from ...utils import check_logprobs_close from ..utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
...@@ -34,7 +34,6 @@ MODELS = [ ...@@ -34,7 +34,6 @@ MODELS = [
] ]
@pytest.mark.quant_model
@pytest.mark.flaky(reruns=3) @pytest.mark.flaky(reruns=3)
@pytest.mark.skipif(not is_quant_method_supported("gptq_marlin"), @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin"),
reason="gptq_marlin is not supported on this GPU type.") reason="gptq_marlin is not supported on this GPU type.")
......
...@@ -13,7 +13,7 @@ import pytest ...@@ -13,7 +13,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from ...utils import check_logprobs_close from ..utils import check_logprobs_close
@dataclass @dataclass
...@@ -39,7 +39,6 @@ model_pairs = [ ...@@ -39,7 +39,6 @@ model_pairs = [
] ]
@pytest.mark.quant_model
@pytest.mark.flaky(reruns=2) @pytest.mark.flaky(reruns=2)
@pytest.mark.skipif(not is_quant_method_supported("gptq_marlin_24"), @pytest.mark.skipif(not is_quant_method_supported("gptq_marlin_24"),
reason="Marlin24 is not supported on this GPU type.") reason="Marlin24 is not supported on this GPU type.")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment