Commit ad58e9b3 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.6.1.post2' into v0.6.1.post2-dev

parents 408f663a 9ba0817f
......@@ -10,7 +10,7 @@ import pytest
from tests.kernels.utils import override_backend_env_variable
from tests.quantization.utils import is_quant_method_supported
from ..models.utils import check_logprobs_close
from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
......@@ -11,7 +11,7 @@ from transformers import AutoTokenizer
from tests.quantization.utils import is_quant_method_supported
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
......@@ -15,7 +15,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported
from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
......@@ -10,9 +10,10 @@ from dataclasses import dataclass
import pytest
from tests.models.utils import check_logprobs_close
from tests.quantization.utils import is_quant_method_supported
from ...utils import check_logprobs_close
@dataclass
class ModelPair:
......
......@@ -6,7 +6,7 @@ import importlib.metadata
import pytest
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
TRANSFORMERS_VERSION = tuple(
map(int,
......
import pytest
from tests.models.utils import check_outputs_equal
from vllm.worker.model_runner import _get_graph_batch_size
from ...utils import check_outputs_equal
MODELS = ["ai21labs/Jamba-tiny-random"]
......
......@@ -16,7 +16,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
@dataclass
......
......@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
"""
import pytest
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
MODELS = [
"mistralai/Mistral-7B-Instruct-v0.1",
......
......@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`.
"""
import pytest
from .utils import check_outputs_equal
from ...utils import check_outputs_equal
MODELS = [
"facebook/opt-125m",
......
......@@ -7,7 +7,7 @@ import torch
from vllm.utils import is_cpu
from .utils import check_logprobs_close
from ...utils import check_logprobs_close
MODELS = [
"microsoft/Phi-3.5-MoE-instruct",
......
......@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer
from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from ..conftest import IMAGE_ASSETS
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype: str, max_tokens: int, num_logprobs: int) -> None:
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalData objects and corresponding
MultiModalConfig as input.
......
"""Compare the outputs of HF and distributed vLLM when using greedy sampling.
Run:
```sh
pytest -s -v test_multimodal_broadcast.py
```
"""
import pytest
from vllm.utils import cuda_device_count_stateless
from ..utils import fork_new_process_for_each_test
from ....utils import multi_gpu_test
@pytest.mark.skipif(cuda_device_count_stateless() < 2,
reason="Need at least 2 GPUs to run the test.")
@pytest.mark.parametrize("model, distributed_executor_backend", [
("llava-hf/llava-1.5-7b-hf", "ray"),
("llava-hf/llava-v1.6-mistral-7b-hf", "ray"),
("facebook/chameleon-7b", "ray"),
("llava-hf/llava-1.5-7b-hf", "mp"),
("llava-hf/llava-v1.6-mistral-7b-hf", "mp"),
("facebook/chameleon-7b", "mp"),
@multi_gpu_test(num_gpus=2)
@pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
@pytest.mark.parametrize("model", [
"llava-hf/llava-1.5-7b-hf",
"llava-hf/llava-v1.6-mistral-7b-hf",
"facebook/chameleon-7b",
])
@fork_new_process_for_each_test
def test_models(hf_runner, vllm_runner, image_assets, model: str,
distributed_executor_backend: str) -> None:
def test_models(hf_runner, vllm_runner, image_assets,
distributed_executor_backend, model) -> None:
dtype = "half"
max_tokens = 5
......@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str,
tensor_parallel_size = 2
if model.startswith("llava-hf/llava-1.5"):
from ..models.test_llava import models, run_test
from .test_llava import models, run_test
elif model.startswith("llava-hf/llava-v1.6"):
from ..models.test_llava_next import run_test # type: ignore[no-redef]
from ..models.test_llava_next import models
from .test_llava_next import models, run_test # type: ignore[no-redef]
elif model.startswith("facebook/chameleon"):
from ..models.test_chameleon import run_test # type: ignore[no-redef]
from ..models.test_chameleon import models
from .test_chameleon import models, run_test # type: ignore[no-redef]
else:
raise NotImplementedError(f"Unsupported model: {model}")
......
......@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding
from vllm.multimodal.utils import rescale_image_size
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_outputs_equal
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_outputs_equal
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -36,7 +34,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
......
......@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -46,7 +44,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......
......@@ -6,9 +6,7 @@ import torch.nn as nn
from huggingface_hub import snapshot_download
from transformers import AutoConfig, AutoModel, CLIPImageProcessor
from ..conftest import _ImageAssets, cleanup
pytestmark = pytest.mark.vlm
from ....conftest import _ImageAssets, cleanup
# we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner
......
......@@ -9,11 +9,9 @@ from transformers import AutoConfig
from vllm.multimodal.utils import rescale_image_size
from vllm.utils import is_cpu
from ..conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -78,7 +76,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
)
@pytest.mark.parametrize("model", ["OpenGVLab/InternVL2-2B"])
@pytest.mark.parametrize("size_factors", [[0.5, 1.0]])
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5])
@torch.inference_mode()
def test_different_num_patches(hf_runner, vllm_runner, image_assets, model,
size_factors, dtype: str, max_tokens: int,
num_logprobs: int) -> None:
images = [asset.pil_image.resize((896, 896)) for asset in image_assets]
inputs_batching = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
inputs_multi_images = [
([HF_MULTIIMAGE_IMAGE_PROMPT for _ in size_factors],
[[rescale_image_size(image, factor) for image in images]
for factor in size_factors])
]
for inputs in [inputs_batching, inputs_multi_images]:
run_test(
hf_runner,
vllm_runner,
inputs,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=2,
tensor_parallel_size=1,
)
@pytest.mark.parametrize(
"models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")])
@pytest.mark.parametrize(
......
......@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ..conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets)
from ...utils import check_logprobs_close
_LIMIT_IMAGE_PER_PROMPT = 4
......@@ -143,7 +141,7 @@ def _run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input.
......@@ -239,7 +237,7 @@ def _run_test(
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5])
def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype: str, max_tokens: int, num_logprobs: int) -> None:
dtype, max_tokens, num_logprobs) -> None:
run_test(
hf_runner,
vllm_runner,
......
......@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
from vllm.sequence import SampleLogprobs
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm
from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from ...utils import check_logprobs_close
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign":
......@@ -62,7 +60,7 @@ def run_test(
):
"""Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images.
All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment