Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
"""Tests for phi3v's multimodal preprocessing kwargs."""
import pytest
......@@ -37,7 +38,10 @@ def test_processor_override(
trust_remote_code=True,
limit_mm_per_prompt={"image": num_imgs},
)
tokenizer = cached_get_tokenizer(ctx.model_config.tokenizer)
tokenizer = cached_get_tokenizer(
ctx.model_config.tokenizer,
trust_remote_code=ctx.model_config.trust_remote_code,
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
......
# SPDX-License-Identifier: Apache-2.0
import pytest
from vllm.multimodal import MULTIMODAL_REGISTRY
......@@ -31,7 +33,10 @@ def test_processor_override(
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
tokenizer = cached_get_tokenizer(ctx.model_config.tokenizer)
tokenizer = cached_get_tokenizer(
ctx.model_config.tokenizer,
trust_remote_code=ctx.model_config.trust_remote_code,
)
processor = MULTIMODAL_REGISTRY.create_processor(
ctx.model_config,
tokenizer=tokenizer,
......
# SPDX-License-Identifier: Apache-2.0
from dataclasses import dataclass, field
from typing import AbstractSet, Any, Literal, Mapping, Optional
......@@ -222,8 +224,7 @@ _CROSS_ENCODER_EXAMPLE_MODELS = {
_MULTIMODAL_EXAMPLE_MODELS = {
# [Decoder-only]
"AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria",
min_transformers_version="4.48"),
"AriaForConditionalGeneration": _HfExamplesInfo("rhymes-ai/Aria"),
"Blip2ForConditionalGeneration": _HfExamplesInfo("Salesforce/blip2-opt-2.7b"), # noqa: E501
"ChameleonForConditionalGeneration": _HfExamplesInfo("facebook/chameleon-7b"), # noqa: E501
"ChatGLMModel": _HfExamplesInfo("THUDM/glm-4v-9b",
......@@ -263,6 +264,8 @@ _MULTIMODAL_EXAMPLE_MODELS = {
trust_remote_code=True),
"Qwen2AudioForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-Audio-7B-Instruct"), # noqa: E501
"Qwen2VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2-VL-2B-Instruct"), # noqa: E501
"Qwen2_5_VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-VL-3B-Instruct", # noqa: E501
min_transformers_version="4.49"), # noqa: E501
"UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_3",
trust_remote_code=True),
# [Encoder-decoder]
......@@ -276,7 +279,11 @@ _SPECULATIVE_DECODING_EXAMPLE_MODELS = {
"MedusaModel": _HfExamplesInfo("JackFram/llama-68m",
speculative_model="abhigoyal/vllm-medusa-llama-68m-random"), # noqa: E501
"MLPSpeculatorPreTrainedModel": _HfExamplesInfo("JackFram/llama-160m",
speculative_model="ibm-fms/llama-160m-accelerator"), # noqa: E501
speculative_model="ibm-ai-platform/llama-160m-accelerator"), # noqa: E501
}
_FALLBACK_MODEL = {
"TransformersModel": _HfExamplesInfo("ArthurZ/Ilama-3.2-1B", trust_remote_code=True), # noqa: E501
}
_EXAMPLE_MODELS = {
......@@ -285,6 +292,7 @@ _EXAMPLE_MODELS = {
**_CROSS_ENCODER_EXAMPLE_MODELS,
**_MULTIMODAL_EXAMPLE_MODELS,
**_SPECULATIVE_DECODING_EXAMPLE_MODELS,
**_FALLBACK_MODEL,
}
......
# SPDX-License-Identifier: Apache-2.0
from unittest.mock import patch
import pytest
......
# SPDX-License-Identifier: Apache-2.0
import os
import pytest
......@@ -13,7 +15,9 @@ def test_plugin(dummy_opt_path):
os.environ["VLLM_PLUGINS"] = ""
with pytest.raises(Exception) as excinfo:
LLM(model=dummy_opt_path, load_format="dummy")
assert "are not supported for now" in str(excinfo.value)
error_msg = "has no vLLM implementation and " \
"the Transformers implementation is not compatible with vLLM."
assert (error_msg in str(excinfo.value))
@fork_new_process_for_each_test
......
# SPDX-License-Identifier: Apache-2.0
import warnings
import pytest
......
# SPDX-License-Identifier: Apache-2.0
"""Test the functionality of the Transformers backend.
Run `pytest tests/models/test_transformers.py`.
"""
from contextlib import nullcontext
from typing import Type
import pytest
from ..conftest import HfRunner, VllmRunner
from ..utils import multi_gpu_test
from .utils import check_logprobs_close
def check_implementation(
hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner],
example_prompts: list[str],
model: str,
**kwargs,
):
max_tokens = 32
num_logprobs = 5
with vllm_runner(model, **kwargs) as vllm_model:
vllm_outputs = vllm_model.generate_greedy_logprobs(
example_prompts, max_tokens, num_logprobs)
with hf_runner(model) as hf_model:
hf_outputs = hf_model.generate_greedy_logprobs_limit(
example_prompts, max_tokens, num_logprobs)
check_logprobs_close(
outputs_0_lst=hf_outputs,
outputs_1_lst=vllm_outputs,
name_0="hf",
name_1="vllm",
)
@pytest.mark.parametrize(
"model,model_impl",
[
("meta-llama/Llama-3.2-1B-Instruct", "transformers"),
("openai-community/gpt2", "transformers"),
("ArthurZ/Ilama-3.2-1B", "auto"), # CUSTOM CODE
("meta-llama/Llama-3.2-1B-Instruct", "auto"),
]) # trust_remote_code=True by default
def test_models(hf_runner, vllm_runner, example_prompts, model,
model_impl) -> None:
maybe_raises = nullcontext()
if model == "openai-community/gpt2" and model_impl == "transformers":
# Model is not backend compatible
maybe_raises = pytest.raises(
ValueError,
match="The Transformers implementation.*not compatible with vLLM")
with maybe_raises:
check_implementation(hf_runner,
vllm_runner,
example_prompts,
model,
model_impl=model_impl)
@multi_gpu_test(num_gpus=2)
def test_distributed(
hf_runner,
vllm_runner,
example_prompts,
):
kwargs = {"model_impl": "transformers", "tensor_parallel_size": 2}
check_implementation(hf_runner, vllm_runner, example_prompts,
"meta-llama/Llama-3.2-1B-Instruct", **kwargs)
# SPDX-License-Identifier: Apache-2.0
import warnings
from typing import Dict, List, Optional, Sequence, Tuple, Union
......
# SPDX-License-Identifier: Apache-2.0
"""Test that aborting is handled properly."""
import asyncio
......
# SPDX-License-Identifier: Apache-2.0
"""Test that various errors are handled properly."""
import asyncio
......
# SPDX-License-Identifier: Apache-2.0
"""Test that the MQLLMEngine is able to handle 10k concurrent requests."""
import asyncio
......
# SPDX-License-Identifier: Apache-2.0
import asyncio
import multiprocessing
from typing import Callable, Tuple, Union
......
# SPDX-License-Identifier: Apache-2.0
# Test the AsyncLLMEngine with multi-step-decoding
from typing import List, Optional
......
# SPDX-License-Identifier: Apache-2.0
# Test the LLMEngine with multi-step-decoding
import copy
......
# SPDX-License-Identifier: Apache-2.0
import torch
from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors
......
# SPDX-License-Identifier: Apache-2.0
from contextlib import nullcontext
from types import MethodType
from typing import cast
from unittest.mock import MagicMock
import numpy as np
import pytest
from transformers import ProcessorMixin
from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
......@@ -634,3 +638,70 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
mm_data=mm_data,
hf_processor_mm_kwargs={},
)
class _ProcessorProxy:
def __init__(self, processor: ProcessorMixin) -> None:
super().__init__()
self.__processor = processor
def __getattr__(self, key: str):
return getattr(self.__processor, key)
def __call__(
self,
text=None,
images=None,
videos=None,
exists=None,
return_tensors=None,
):
return dict(exists=exists)
@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-7B-Instruct"]) # Dummy
# yapf: disable
@pytest.mark.parametrize(
("call_kwargs", "expected_kwargs"),
[
# Should ignore invalid kwargs
({"does_not_exist": 100}, {"exists": None}),
({"exists": 1}, {"exists": 1}),
({"does_not_exist": 100, "exists": 1}, {"exists": 1}),
],
)
# yapf: enable
def test_hf_processor_kwargs(model_id, call_kwargs, expected_kwargs):
model_config = ModelConfig(
model=model_id,
task="auto",
tokenizer=model_id,
tokenizer_mode="auto",
trust_remote_code=False,
seed=0,
dtype="half",
revision=None,
)
processor = MULTIMODAL_REGISTRY.create_processor(
model_config,
tokenizer=cached_get_tokenizer(model_config.tokenizer),
)
orig_get_hf_processor = processor.info.get_hf_processor
def get_hf_processor(self, **kwargs):
assert kwargs == call_kwargs
return _ProcessorProxy(orig_get_hf_processor())
processor.info.get_hf_processor = MethodType(get_hf_processor,
processor.info)
out_kwargs = processor._call_hf_processor(
prompt="",
mm_data={},
mm_kwargs=call_kwargs,
)
assert out_kwargs == expected_kwargs
# SPDX-License-Identifier: Apache-2.0
import base64
import mimetypes
import os
......
# SPDX-License-Identifier: Apache-2.0
import numpy as np
from PIL import Image
......
# SPDX-License-Identifier: Apache-2.0
import random
from typing import Optional
......
# SPDX-License-Identifier: Apache-2.0
from setuptools import setup
setup(name='vllm_add_dummy_model',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment