Commit ad58e9b3 authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.6.1.post2' into v0.6.1.post2-dev

parents 408f663a 9ba0817f
...@@ -10,7 +10,7 @@ import pytest ...@@ -10,7 +10,7 @@ import pytest
from tests.kernels.utils import override_backend_env_variable from tests.kernels.utils import override_backend_env_variable
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from ..models.utils import check_logprobs_close from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
...@@ -11,7 +11,7 @@ from transformers import AutoTokenizer ...@@ -11,7 +11,7 @@ from transformers import AutoTokenizer
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from .utils import check_logprobs_close from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
...@@ -15,7 +15,7 @@ import pytest ...@@ -15,7 +15,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT from vllm.model_executor.layers.rotary_embedding import _ROPE_DICT
from .utils import check_logprobs_close from ...utils import check_logprobs_close
os.environ["TOKENIZERS_PARALLELISM"] = "true" os.environ["TOKENIZERS_PARALLELISM"] = "true"
......
...@@ -10,9 +10,10 @@ from dataclasses import dataclass ...@@ -10,9 +10,10 @@ from dataclasses import dataclass
import pytest import pytest
from tests.models.utils import check_logprobs_close
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from ...utils import check_logprobs_close
@dataclass @dataclass
class ModelPair: class ModelPair:
......
...@@ -6,7 +6,7 @@ import importlib.metadata ...@@ -6,7 +6,7 @@ import importlib.metadata
import pytest import pytest
from .utils import check_logprobs_close from ...utils import check_logprobs_close
TRANSFORMERS_VERSION = tuple( TRANSFORMERS_VERSION = tuple(
map(int, map(int,
......
import pytest import pytest
from tests.models.utils import check_outputs_equal
from vllm.worker.model_runner import _get_graph_batch_size from vllm.worker.model_runner import _get_graph_batch_size
from ...utils import check_outputs_equal
MODELS = ["ai21labs/Jamba-tiny-random"] MODELS = ["ai21labs/Jamba-tiny-random"]
......
...@@ -16,7 +16,7 @@ import pytest ...@@ -16,7 +16,7 @@ import pytest
from tests.quantization.utils import is_quant_method_supported from tests.quantization.utils import is_quant_method_supported
from .utils import check_logprobs_close from ...utils import check_logprobs_close
@dataclass @dataclass
......
...@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`. ...@@ -4,7 +4,7 @@ Run `pytest tests/models/test_mistral.py`.
""" """
import pytest import pytest
from .utils import check_logprobs_close from ...utils import check_logprobs_close
MODELS = [ MODELS = [
"mistralai/Mistral-7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.1",
......
...@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`. ...@@ -7,7 +7,7 @@ Run `pytest tests/models/test_models.py`.
""" """
import pytest import pytest
from .utils import check_outputs_equal from ...utils import check_outputs_equal
MODELS = [ MODELS = [
"facebook/opt-125m", "facebook/opt-125m",
......
...@@ -7,7 +7,7 @@ import torch ...@@ -7,7 +7,7 @@ import torch
from vllm.utils import is_cpu from vllm.utils import is_cpu
from .utils import check_logprobs_close from ...utils import check_logprobs_close
MODELS = [ MODELS = [
"microsoft/Phi-3.5-MoE-instruct", "microsoft/Phi-3.5-MoE-instruct",
......
...@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer ...@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, AutoTokenizer
from vllm.multimodal.utils import rescale_image_size from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from ..conftest import IMAGE_ASSETS from ....conftest import IMAGE_ASSETS
from .utils import check_logprobs_close from ...utils import check_logprobs_close
pytestmark = pytest.mark.vlm
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, ...@@ -56,7 +54,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype: str, max_tokens: int, num_logprobs: int) -> None: dtype: str, max_tokens: int, num_logprobs: int) -> None:
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalData objects and corresponding For vllm runner, we provide MultiModalData objects and corresponding
MultiModalConfig as input. MultiModalConfig as input.
......
"""Compare the outputs of HF and distributed vLLM when using greedy sampling.
Run:
```sh
pytest -s -v test_multimodal_broadcast.py
```
"""
import pytest import pytest
from vllm.utils import cuda_device_count_stateless from ....utils import multi_gpu_test
from ..utils import fork_new_process_for_each_test
@pytest.mark.skipif(cuda_device_count_stateless() < 2, @multi_gpu_test(num_gpus=2)
reason="Need at least 2 GPUs to run the test.") @pytest.mark.parametrize("distributed_executor_backend", ["ray", "mp"])
@pytest.mark.parametrize("model, distributed_executor_backend", [ @pytest.mark.parametrize("model", [
("llava-hf/llava-1.5-7b-hf", "ray"), "llava-hf/llava-1.5-7b-hf",
("llava-hf/llava-v1.6-mistral-7b-hf", "ray"), "llava-hf/llava-v1.6-mistral-7b-hf",
("facebook/chameleon-7b", "ray"), "facebook/chameleon-7b",
("llava-hf/llava-1.5-7b-hf", "mp"),
("llava-hf/llava-v1.6-mistral-7b-hf", "mp"),
("facebook/chameleon-7b", "mp"),
]) ])
@fork_new_process_for_each_test def test_models(hf_runner, vllm_runner, image_assets,
def test_models(hf_runner, vllm_runner, image_assets, model: str, distributed_executor_backend, model) -> None:
distributed_executor_backend: str) -> None:
dtype = "half" dtype = "half"
max_tokens = 5 max_tokens = 5
...@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str, ...@@ -33,13 +19,11 @@ def test_models(hf_runner, vllm_runner, image_assets, model: str,
tensor_parallel_size = 2 tensor_parallel_size = 2
if model.startswith("llava-hf/llava-1.5"): if model.startswith("llava-hf/llava-1.5"):
from ..models.test_llava import models, run_test from .test_llava import models, run_test
elif model.startswith("llava-hf/llava-v1.6"): elif model.startswith("llava-hf/llava-v1.6"):
from ..models.test_llava_next import run_test # type: ignore[no-redef] from .test_llava_next import models, run_test # type: ignore[no-redef]
from ..models.test_llava_next import models
elif model.startswith("facebook/chameleon"): elif model.startswith("facebook/chameleon"):
from ..models.test_chameleon import run_test # type: ignore[no-redef] from .test_chameleon import models, run_test # type: ignore[no-redef]
from ..models.test_chameleon import models
else: else:
raise NotImplementedError(f"Unsupported model: {model}") raise NotImplementedError(f"Unsupported model: {model}")
......
...@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding ...@@ -6,10 +6,8 @@ from transformers import AutoModelForVision2Seq, BatchEncoding
from vllm.multimodal.utils import rescale_image_size from vllm.multimodal.utils import rescale_image_size
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_outputs_equal from ...utils import check_outputs_equal
pytestmark = pytest.mark.vlm
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -36,7 +34,7 @@ def run_test( ...@@ -36,7 +34,7 @@ def run_test(
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input. and corresponding vision language config as input.
......
...@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size ...@@ -6,10 +6,8 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu from vllm.utils import is_cpu
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close from ...utils import check_logprobs_close
pytestmark = pytest.mark.vlm
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -46,7 +44,7 @@ def run_test( ...@@ -46,7 +44,7 @@ def run_test(
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input. and corresponding MultiModalConfig as input.
......
...@@ -6,9 +6,7 @@ import torch.nn as nn ...@@ -6,9 +6,7 @@ import torch.nn as nn
from huggingface_hub import snapshot_download from huggingface_hub import snapshot_download
from transformers import AutoConfig, AutoModel, CLIPImageProcessor from transformers import AutoConfig, AutoModel, CLIPImageProcessor
from ..conftest import _ImageAssets, cleanup from ....conftest import _ImageAssets, cleanup
pytestmark = pytest.mark.vlm
# we use snapshot_download to prevent conflicts between # we use snapshot_download to prevent conflicts between
# dynamic_module and trust_remote_code for hf_runner # dynamic_module and trust_remote_code for hf_runner
......
...@@ -9,11 +9,9 @@ from transformers import AutoConfig ...@@ -9,11 +9,9 @@ from transformers import AutoConfig
from vllm.multimodal.utils import rescale_image_size from vllm.multimodal.utils import rescale_image_size
from vllm.utils import is_cpu from vllm.utils import is_cpu
from ..conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from .utils import check_logprobs_close from ...utils import check_logprobs_close
pytestmark = pytest.mark.vlm
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -78,7 +76,7 @@ def run_test( ...@@ -78,7 +76,7 @@ def run_test(
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input. and corresponding MultiModalConfig as input.
...@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model, ...@@ -331,6 +329,41 @@ def test_multi_images_models(hf_runner, vllm_runner, image_assets, model,
) )
@pytest.mark.parametrize("model", ["OpenGVLab/InternVL2-2B"])
@pytest.mark.parametrize("size_factors", [[0.5, 1.0]])
@pytest.mark.parametrize("dtype", [target_dtype])
@pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5])
@torch.inference_mode()
def test_different_num_patches(hf_runner, vllm_runner, image_assets, model,
size_factors, dtype: str, max_tokens: int,
num_logprobs: int) -> None:
images = [asset.pil_image.resize((896, 896)) for asset in image_assets]
inputs_batching = [(
[prompt for _ in size_factors],
[rescale_image_size(image, factor) for factor in size_factors],
) for image, prompt in zip(images, HF_IMAGE_PROMPTS)]
inputs_multi_images = [
([HF_MULTIIMAGE_IMAGE_PROMPT for _ in size_factors],
[[rescale_image_size(image, factor) for image in images]
for factor in size_factors])
]
for inputs in [inputs_batching, inputs_multi_images]:
run_test(
hf_runner,
vllm_runner,
inputs,
model,
dtype=dtype,
max_tokens=max_tokens,
num_logprobs=num_logprobs,
mm_limit=2,
tensor_parallel_size=1,
)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")]) "models", [("OpenGVLab/InternVL2-2B", "OpenGVLab/InternVL2-2B-AWQ")])
@pytest.mark.parametrize( @pytest.mark.parametrize(
......
...@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size ...@@ -8,11 +8,9 @@ from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE
from ..conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner, from ....conftest import (IMAGE_ASSETS, HfRunner, PromptImageInput, VllmRunner,
_ImageAssets) _ImageAssets)
from .utils import check_logprobs_close from ...utils import check_logprobs_close
pytestmark = pytest.mark.vlm
_LIMIT_IMAGE_PER_PROMPT = 4 _LIMIT_IMAGE_PER_PROMPT = 4
...@@ -143,7 +141,7 @@ def _run_test( ...@@ -143,7 +141,7 @@ def _run_test(
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects For vllm runner, we provide MultiModalDataDict objects
and corresponding MultiModalConfig as input. and corresponding MultiModalConfig as input.
...@@ -239,7 +237,7 @@ def _run_test( ...@@ -239,7 +237,7 @@ def _run_test(
@pytest.mark.parametrize("max_tokens", [128]) @pytest.mark.parametrize("max_tokens", [128])
@pytest.mark.parametrize("num_logprobs", [5]) @pytest.mark.parametrize("num_logprobs", [5])
def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
dtype: str, max_tokens: int, num_logprobs: int) -> None: dtype, max_tokens, num_logprobs) -> None:
run_test( run_test(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
......
...@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer ...@@ -5,10 +5,8 @@ from transformers import AutoConfig, AutoModelForVision2Seq, AutoTokenizer
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ....conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
from .utils import check_logprobs_close from ...utils import check_logprobs_close
pytestmark = pytest.mark.vlm
HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({ HF_IMAGE_PROMPTS = IMAGE_ASSETS.prompts({
"stop_sign": "stop_sign":
...@@ -62,7 +60,7 @@ def run_test( ...@@ -62,7 +60,7 @@ def run_test(
): ):
"""Inference result should be the same between hf and vllm. """Inference result should be the same between hf and vllm.
All the image fixtures for the test is under tests/images. All the image fixtures for the test are from IMAGE_ASSETS.
For huggingface runner, we provide the PIL images as input. For huggingface runner, we provide the PIL images as input.
For vllm runner, we provide MultiModalDataDict objects For vllm runner, we provide MultiModalDataDict objects
and corresponding vision language config as input. and corresponding vision language config as input.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment