Unverified Commit 4e256cad authored by Harry Mellor's avatar Harry Mellor Committed by GitHub
Browse files

Remove all references to `yapf` as it's no longer used (#26251)


Signed-off-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent d6953beb
...@@ -12,9 +12,6 @@ from functools import reduce ...@@ -12,9 +12,6 @@ from functools import reduce
from typing import Optional, Union from typing import Optional, Union
import jinja2 import jinja2
# yapf conflicts with isort for this block
# yapf: disable
from vllm_cutlass_library_extension import ( from vllm_cutlass_library_extension import (
DataType, DataType,
EpilogueScheduleTag, EpilogueScheduleTag,
...@@ -31,8 +28,6 @@ from vllm_cutlass_library_extension import ( ...@@ -31,8 +28,6 @@ from vllm_cutlass_library_extension import (
VLLMKernelScheduleTag, VLLMKernelScheduleTag,
) )
# yapf: enable
# #
# Generator templating # Generator templating
# #
......
...@@ -21,8 +21,6 @@ from vllm.utils import FlexibleArgumentParser ...@@ -21,8 +21,6 @@ from vllm.utils import FlexibleArgumentParser
logger = logging.getLogger() logger = logging.getLogger()
# yapf conflicts with isort for this docstring
# yapf: disable
""" """
tensorize_vllm_model.py is a script that can be used to serialize and tensorize_vllm_model.py is a script that can be used to serialize and
deserialize vLLM models. These models can be loaded using tensorizer deserialize vLLM models. These models can be loaded using tensorizer
...@@ -132,7 +130,8 @@ def get_parser(): ...@@ -132,7 +130,8 @@ def get_parser():
"can be loaded using tensorizer directly to the GPU " "can be loaded using tensorizer directly to the GPU "
"extremely quickly. Tensor encryption and decryption is " "extremely quickly. Tensor encryption and decryption is "
"also supported, although libsodium must be installed to " "also supported, although libsodium must be installed to "
"use it.") "use it."
)
parser = EngineArgs.add_cli_args(parser) parser = EngineArgs.add_cli_args(parser)
parser.add_argument( parser.add_argument(
...@@ -144,13 +143,14 @@ def get_parser(): ...@@ -144,13 +143,14 @@ def get_parser():
"along with the model by instantiating a TensorizerConfig object, " "along with the model by instantiating a TensorizerConfig object, "
"creating a dict from it with TensorizerConfig.to_serializable(), " "creating a dict from it with TensorizerConfig.to_serializable(), "
"and passing it to LoRARequest's initializer with the kwarg " "and passing it to LoRARequest's initializer with the kwarg "
"tensorizer_config_dict." "tensorizer_config_dict.",
) )
subparsers = parser.add_subparsers(dest='command', required=True) subparsers = parser.add_subparsers(dest="command", required=True)
serialize_parser = subparsers.add_parser( serialize_parser = subparsers.add_parser(
'serialize', help="Serialize a model to `--serialized-directory`") "serialize", help="Serialize a model to `--serialized-directory`"
)
serialize_parser.add_argument( serialize_parser.add_argument(
"--suffix", "--suffix",
...@@ -163,7 +163,9 @@ def get_parser(): ...@@ -163,7 +163,9 @@ def get_parser():
"`--suffix` is `v1`, the serialized model tensors will be " "`--suffix` is `v1`, the serialized model tensors will be "
"saved to " "saved to "
"`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. " "`s3://my-bucket/vllm/EleutherAI/gpt-j-6B/v1/model.tensors`. "
"If none is provided, a random UUID will be used.")) "If none is provided, a random UUID will be used."
),
)
serialize_parser.add_argument( serialize_parser.add_argument(
"--serialized-directory", "--serialized-directory",
type=str, type=str,
...@@ -175,33 +177,44 @@ def get_parser(): ...@@ -175,33 +177,44 @@ def get_parser():
"and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will " "and the model HuggingFace ID is `EleutherAI/gpt-j-6B`, tensors will "
"be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, " "be saved to `dir/vllm/EleutherAI/gpt-j-6B/suffix/model.tensors`, "
"where `suffix` is given by `--suffix` or a random UUID if not " "where `suffix` is given by `--suffix` or a random UUID if not "
"provided.") "provided.",
)
serialize_parser.add_argument( serialize_parser.add_argument(
"--serialization-kwargs", "--serialization-kwargs",
type=tensorizer_kwargs_arg, type=tensorizer_kwargs_arg,
required=False, required=False,
help=("A JSON string containing additional keyword arguments to " help=(
"A JSON string containing additional keyword arguments to "
"pass to Tensorizer's TensorSerializer during " "pass to Tensorizer's TensorSerializer during "
"serialization.")) "serialization."
),
)
serialize_parser.add_argument( serialize_parser.add_argument(
"--keyfile", "--keyfile",
type=str, type=str,
required=False, required=False,
help=("Encrypt the model weights with a randomly-generated binary key," help=(
" and save the key at this path")) "Encrypt the model weights with a randomly-generated binary key,"
" and save the key at this path"
),
)
deserialize_parser = subparsers.add_parser( deserialize_parser = subparsers.add_parser(
'deserialize', "deserialize",
help=("Deserialize a model from `--path-to-tensors`" help=(
" to verify it can be loaded and used.")) "Deserialize a model from `--path-to-tensors`"
" to verify it can be loaded and used."
),
)
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--path-to-tensors", "--path-to-tensors",
type=str, type=str,
required=False, required=False,
help="The local path or S3 URI to the model tensors to deserialize. ") help="The local path or S3 URI to the model tensors to deserialize. ",
)
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--serialized-directory", "--serialized-directory",
...@@ -209,74 +222,82 @@ def get_parser(): ...@@ -209,74 +222,82 @@ def get_parser():
required=False, required=False,
help="Directory with model artifacts for loading. Assumes a " help="Directory with model artifacts for loading. Assumes a "
"model.tensors file exists therein. Can supersede " "model.tensors file exists therein. Can supersede "
"--path-to-tensors.") "--path-to-tensors.",
)
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--keyfile", "--keyfile",
type=str, type=str,
required=False, required=False,
help=("Path to a binary key to use to decrypt the model weights," help=(
" if the model was serialized with encryption")) "Path to a binary key to use to decrypt the model weights,"
" if the model was serialized with encryption"
),
)
deserialize_parser.add_argument( deserialize_parser.add_argument(
"--deserialization-kwargs", "--deserialization-kwargs",
type=tensorizer_kwargs_arg, type=tensorizer_kwargs_arg,
required=False, required=False,
help=("A JSON string containing additional keyword arguments to " help=(
"A JSON string containing additional keyword arguments to "
"pass to Tensorizer's `TensorDeserializer` during " "pass to Tensorizer's `TensorDeserializer` during "
"deserialization.")) "deserialization."
),
)
TensorizerArgs.add_cli_args(deserialize_parser) TensorizerArgs.add_cli_args(deserialize_parser)
return parser return parser
def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
cfg: TensorizerConfig): def merge_extra_config_with_tensorizer_config(extra_cfg: dict, cfg: TensorizerConfig):
for k, v in extra_cfg.items(): for k, v in extra_cfg.items():
if hasattr(cfg, k): if hasattr(cfg, k):
setattr(cfg, k, v) setattr(cfg, k, v)
logger.info( logger.info(
"Updating TensorizerConfig with %s from " "Updating TensorizerConfig with %s from "
"--model-loader-extra-config provided", k "--model-loader-extra-config provided",
k,
) )
def deserialize(args, tensorizer_config): def deserialize(args, tensorizer_config):
if args.lora_path: if args.lora_path:
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
llm = LLM(model=args.model, llm = LLM(
model=args.model,
load_format="tensorizer", load_format="tensorizer",
tensor_parallel_size=args.tensor_parallel_size, tensor_parallel_size=args.tensor_parallel_size,
model_loader_extra_config=tensorizer_config, model_loader_extra_config=tensorizer_config,
enable_lora=True, enable_lora=True,
) )
sampling_params = SamplingParams( sampling_params = SamplingParams(
temperature=0, temperature=0, max_tokens=256, stop=["[/assistant]"]
max_tokens=256,
stop=["[/assistant]"]
) )
# Truncating this as the extra text isn't necessary # Truncating this as the extra text isn't necessary
prompts = [ prompts = ["[user] Write a SQL query to answer the question based on ..."]
"[user] Write a SQL query to answer the question based on ..."
]
# Test LoRA load # Test LoRA load
print( print(
llm.generate( llm.generate(
prompts, prompts,
sampling_params, sampling_params,
lora_request=LoRARequest("sql-lora", lora_request=LoRARequest(
"sql-lora",
1, 1,
args.lora_path, args.lora_path,
tensorizer_config_dict = tensorizer_config tensorizer_config_dict=tensorizer_config.to_serializable(),
.to_serializable()) ),
) )
) )
else: else:
llm = LLM(model=args.model, llm = LLM(
model=args.model,
load_format="tensorizer", load_format="tensorizer",
tensor_parallel_size=args.tensor_parallel_size, tensor_parallel_size=args.tensor_parallel_size,
model_loader_extra_config=tensorizer_config model_loader_extra_config=tensorizer_config,
) )
return llm return llm
...@@ -285,17 +306,20 @@ def main(): ...@@ -285,17 +306,20 @@ def main():
parser = get_parser() parser = get_parser()
args = parser.parse_args() args = parser.parse_args()
s3_access_key_id = (getattr(args, 's3_access_key_id', None) s3_access_key_id = getattr(args, "s3_access_key_id", None) or os.environ.get(
or os.environ.get("S3_ACCESS_KEY_ID", None)) "S3_ACCESS_KEY_ID", None
s3_secret_access_key = (getattr(args, 's3_secret_access_key', None) )
or os.environ.get("S3_SECRET_ACCESS_KEY", None)) s3_secret_access_key = getattr(
s3_endpoint = (getattr(args, 's3_endpoint', None) args, "s3_secret_access_key", None
or os.environ.get("S3_ENDPOINT_URL", None)) ) or os.environ.get("S3_SECRET_ACCESS_KEY", None)
s3_endpoint = getattr(args, "s3_endpoint", None) or os.environ.get(
"S3_ENDPOINT_URL", None
)
credentials = { credentials = {
"s3_access_key_id": s3_access_key_id, "s3_access_key_id": s3_access_key_id,
"s3_secret_access_key": s3_secret_access_key, "s3_secret_access_key": s3_secret_access_key,
"s3_endpoint": s3_endpoint "s3_endpoint": s3_endpoint,
} }
model_ref = args.model model_ref = args.model
...@@ -309,25 +333,25 @@ def main(): ...@@ -309,25 +333,25 @@ def main():
if args.model_loader_extra_config: if args.model_loader_extra_config:
extra_config = json.loads(args.model_loader_extra_config) extra_config = json.loads(args.model_loader_extra_config)
tensorizer_dir = args.serialized_directory or extra_config.get("tensorizer_dir")
tensorizer_dir = (args.serialized_directory or tensorizer_uri = getattr(args, "path_to_tensors", None) or extra_config.get(
extra_config.get("tensorizer_dir")) "tensorizer_uri"
tensorizer_uri = (getattr(args, "path_to_tensors", None) )
or extra_config.get("tensorizer_uri"))
if tensorizer_dir and tensorizer_uri: if tensorizer_dir and tensorizer_uri:
parser.error("--serialized-directory and --path-to-tensors " parser.error(
"cannot both be provided") "--serialized-directory and --path-to-tensors cannot both be provided"
)
if not tensorizer_dir and not tensorizer_uri: if not tensorizer_dir and not tensorizer_uri:
parser.error("Either --serialized-directory or --path-to-tensors " parser.error(
"must be provided") "Either --serialized-directory or --path-to-tensors must be provided"
)
if args.command == "serialize": if args.command == "serialize":
engine_args = EngineArgs.from_cli_args(args) engine_args = EngineArgs.from_cli_args(args)
input_dir = tensorizer_dir.rstrip('/') input_dir = tensorizer_dir.rstrip("/")
suffix = args.suffix if args.suffix else uuid.uuid4().hex suffix = args.suffix if args.suffix else uuid.uuid4().hex
base_path = f"{input_dir}/vllm/{model_ref}/{suffix}" base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
if engine_args.tensor_parallel_size > 1: if engine_args.tensor_parallel_size > 1:
...@@ -339,15 +363,14 @@ def main(): ...@@ -339,15 +363,14 @@ def main():
tensorizer_uri=model_path, tensorizer_uri=model_path,
encryption_keyfile=keyfile, encryption_keyfile=keyfile,
serialization_kwargs=args.serialization_kwargs or {}, serialization_kwargs=args.serialization_kwargs or {},
**credentials **credentials,
) )
if args.lora_path: if args.lora_path:
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
tensorize_lora_adapter(args.lora_path, tensorizer_config) tensorize_lora_adapter(args.lora_path, tensorizer_config)
merge_extra_config_with_tensorizer_config(extra_config, merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
tensorizer_config)
tensorize_vllm_model(engine_args, tensorizer_config) tensorize_vllm_model(engine_args, tensorizer_config)
elif args.command == "deserialize": elif args.command == "deserialize":
...@@ -356,11 +379,10 @@ def main(): ...@@ -356,11 +379,10 @@ def main():
tensorizer_dir=args.serialized_directory, tensorizer_dir=args.serialized_directory,
encryption_keyfile=keyfile, encryption_keyfile=keyfile,
deserialization_kwargs=args.deserialization_kwargs or {}, deserialization_kwargs=args.deserialization_kwargs or {},
**credentials **credentials,
) )
merge_extra_config_with_tensorizer_config(extra_config, merge_extra_config_with_tensorizer_config(extra_config, tensorizer_config)
tensorizer_config)
deserialize(args, tensorizer_config) deserialize(args, tensorizer_config)
else: else:
raise ValueError("Either serialize or deserialize must be specified.") raise ValueError("Either serialize or deserialize must be specified.")
......
...@@ -8,16 +8,11 @@ import torch ...@@ -8,16 +8,11 @@ import torch
import vllm.envs as envs import vllm.envs as envs
from tests.kernels.quantization.nvfp4_utils import quant_nvfp4_tensor from tests.kernels.quantization.nvfp4_utils import quant_nvfp4_tensor
from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant
# yapf conflicts with isort for this block
# yapf: disable
from vllm.compilation.activation_quant_fusion import ( from vllm.compilation.activation_quant_fusion import (
FUSED_OPS, FUSED_OPS,
SILU_MUL_OP, SILU_MUL_OP,
ActivationQuantFusionPass, ActivationQuantFusionPass,
) )
# yapf: enable
from vllm.compilation.fusion import QUANT_OPS from vllm.compilation.fusion import QUANT_OPS
from vllm.compilation.noop_elimination import NoOpEliminationPass from vllm.compilation.noop_elimination import NoOpEliminationPass
from vllm.compilation.post_cleanup import PostCleanupPass from vllm.compilation.post_cleanup import PostCleanupPass
......
...@@ -107,10 +107,8 @@ class EPTestSettings: ...@@ -107,10 +107,8 @@ class EPTestSettings:
# NOTE: You can adjust tp_base locally to fit the model in GPU # NOTE: You can adjust tp_base locally to fit the model in GPU
# The values displayed here are only a rough indicator of the size of the model # The values displayed here are only a rough indicator of the size of the model
# yapf: disable
TEST_MODELS = { TEST_MODELS = {
"deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast( "deepseek-ai/DeepSeek-V2-Lite-Chat": EPTestSettings.fast(trust_remote_code=True),
trust_remote_code=True),
"mistralai/Mixtral-8x7B-Instruct-v0.1": EPTestSettings.fast(tp_base=4), "mistralai/Mixtral-8x7B-Instruct-v0.1": EPTestSettings.fast(tp_base=4),
} }
...@@ -192,22 +190,24 @@ def _compare_tp( ...@@ -192,22 +190,24 @@ def _compare_tp(
] ]
try: try:
compare_two_settings(model_name, compare_two_settings(
model_name,
ep_args, ep_args,
tp_args, tp_args,
ep_env, ep_env,
tp_env, tp_env,
method=method, method=method,
max_wait_seconds=360) max_wait_seconds=360,
)
except Exception: except Exception:
raise raise
@pytest.mark.parametrize( @pytest.mark.parametrize(
("model_name", "parallel_setup", "distributed_backend", "runner", ("model_name", "parallel_setup", "distributed_backend", "runner", "test_options"),
"test_options"),
[ [
params for model_name, settings in TEST_MODELS.items() params
for model_name, settings in TEST_MODELS.items()
for params in settings.iter_params(model_name) for params in settings.iter_params(model_name)
], ],
) )
...@@ -220,10 +220,12 @@ def test_ep( ...@@ -220,10 +220,12 @@ def test_ep(
test_options: EPTestOptions, test_options: EPTestOptions,
num_gpus_available, num_gpus_available,
): ):
_compare_tp(model_name, _compare_tp(
model_name,
parallel_setup, parallel_setup,
distributed_backend, distributed_backend,
runner, runner,
test_options, test_options,
num_gpus_available, num_gpus_available,
method="generate") method="generate",
)
...@@ -100,7 +100,6 @@ class PPTestSettings: ...@@ -100,7 +100,6 @@ class PPTestSettings:
# NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU # NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU
# The values displayed here are only a rough indicator of the size of the model # The values displayed here are only a rough indicator of the size of the model
# yapf: disable
TEXT_GENERATION_MODELS = { TEXT_GENERATION_MODELS = {
# [Decoder-only] # [Decoder-only]
# Uses Llama # Uses Llama
...@@ -150,7 +149,9 @@ TEXT_GENERATION_MODELS = { ...@@ -150,7 +149,9 @@ TEXT_GENERATION_MODELS = {
"adept/persimmon-8b-chat": PPTestSettings.fast(), "adept/persimmon-8b-chat": PPTestSettings.fast(),
"microsoft/phi-2": PPTestSettings.fast(), "microsoft/phi-2": PPTestSettings.fast(),
"microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(), "microsoft/Phi-3-small-8k-instruct": PPTestSettings.fast(),
"microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(multi_node_only=True, load_format="dummy"), # noqa: E501 "microsoft/Phi-3.5-MoE-instruct": PPTestSettings.detailed(
multi_node_only=True, load_format="dummy"
), # noqa: E501
"Qwen/Qwen-7B-Chat": PPTestSettings.fast(), "Qwen/Qwen-7B-Chat": PPTestSettings.fast(),
"Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(), "Qwen/Qwen2.5-0.5B-Instruct": PPTestSettings.fast(),
"Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(), "Qwen/Qwen1.5-MoE-A2.7B-Chat": PPTestSettings.fast(),
...@@ -196,7 +197,6 @@ MULTIMODAL_MODELS = { ...@@ -196,7 +197,6 @@ MULTIMODAL_MODELS = {
"Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(), "Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(),
"fixie-ai/ultravox-v0_5-llama-3_2-1b": PPTestSettings.fast(), "fixie-ai/ultravox-v0_5-llama-3_2-1b": PPTestSettings.fast(),
} }
# yapf: enable
# NOTE: You can update this on your local machine to run specific tests # NOTE: You can update this on your local machine to run specific tests
TEST_MODELS = [ TEST_MODELS = [
......
...@@ -287,29 +287,15 @@ def test_prefix_cache_default(): ...@@ -287,29 +287,15 @@ def test_prefix_cache_default():
assert not engine_args.enable_prefix_caching assert not engine_args.enable_prefix_caching
# yapf: disable @pytest.mark.parametrize(
@pytest.mark.parametrize(("arg", "expected", "option"), [ ("arg", "expected", "option"),
[
(None, None, "mm-processor-kwargs"), (None, None, "mm-processor-kwargs"),
("{}", {}, "mm-processor-kwargs"), ("{}", {}, "mm-processor-kwargs"),
( ('{"num_crops": 4}', {"num_crops": 4}, "mm-processor-kwargs"),
'{"num_crops": 4}', ('{"foo": {"bar": "baz"}}', {"foo": {"bar": "baz"}}, "mm-processor-kwargs"),
{ ],
"num_crops": 4 )
},
"mm-processor-kwargs"
),
(
'{"foo": {"bar": "baz"}}',
{
"foo":
{
"bar": "baz"
}
},
"mm-processor-kwargs"
),
])
# yapf: enable
def test_composite_arg_parser(arg, expected, option): def test_composite_arg_parser(arg, expected, option):
parser = EngineArgs.add_cli_args(FlexibleArgumentParser()) parser = EngineArgs.add_cli_args(FlexibleArgumentParser())
if arg is None: if arg is None:
...@@ -321,8 +307,7 @@ def test_composite_arg_parser(arg, expected, option): ...@@ -321,8 +307,7 @@ def test_composite_arg_parser(arg, expected, option):
def test_human_readable_model_len(): def test_human_readable_model_len():
# `exit_on_error` disabled to test invalid values below # `exit_on_error` disabled to test invalid values below
parser = EngineArgs.add_cli_args( parser = EngineArgs.add_cli_args(FlexibleArgumentParser(exit_on_error=False))
FlexibleArgumentParser(exit_on_error=False))
args = parser.parse_args([]) args = parser.parse_args([])
assert args.max_model_len is None assert args.max_model_len is None
......
...@@ -15,6 +15,7 @@ from vllm.assets.video import VideoAsset ...@@ -15,6 +15,7 @@ from vllm.assets.video import VideoAsset
from vllm.config import ModelConfig from vllm.config import ModelConfig
from vllm.entrypoints.chat_utils import ( from vllm.entrypoints.chat_utils import (
_try_extract_ast, _try_extract_ast,
apply_mistral_chat_template,
load_chat_template, load_chat_template,
parse_chat_messages, parse_chat_messages,
parse_chat_messages_futures, parse_chat_messages_futures,
...@@ -1855,17 +1856,17 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa ...@@ -1855,17 +1856,17 @@ def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, expected_kwa
# NOTE: Qwen2-Audio default chat template is specially defined inside # NOTE: Qwen2-Audio default chat template is specially defined inside
# processor class instead of using `tokenizer_config.json` # processor class instead of using `tokenizer_config.json`
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("model", "expected_format"), ("model", "expected_format"),
[(PHI3V_MODEL_ID, "string"), [
(PHI3V_MODEL_ID, "string"),
(QWEN2VL_MODEL_ID, "openai"), (QWEN2VL_MODEL_ID, "openai"),
(QWEN25VL_MODEL_ID, "openai"), (QWEN25VL_MODEL_ID, "openai"),
(ULTRAVOX_MODEL_ID, "string"), (ULTRAVOX_MODEL_ID, "string"),
(QWEN2AUDIO_MODEL_ID, "openai"), (QWEN2AUDIO_MODEL_ID, "openai"),
(LLAMA_GUARD_MODEL_ID, "openai")], (LLAMA_GUARD_MODEL_ID, "openai"),
],
) )
# yapf: enable
def test_resolve_content_format_hf_defined(model, expected_format): def test_resolve_content_format_hf_defined(model, expected_format):
model_info = HF_EXAMPLE_MODELS.find_hf_info(model) model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip") model_info.check_available_online(on_fail="skip")
...@@ -1879,7 +1880,8 @@ def test_resolve_content_format_hf_defined(model, expected_format): ...@@ -1879,7 +1880,8 @@ def test_resolve_content_format_hf_defined(model, expected_format):
hf_overrides=model_info.hf_overrides, hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.skip_tokenizer_init, skip_tokenizer_init=model_info.skip_tokenizer_init,
enforce_eager=model_info.enforce_eager, enforce_eager=model_info.enforce_eager,
dtype=model_info.dtype) dtype=model_info.dtype,
)
tokenizer = get_tokenizer( tokenizer = get_tokenizer(
model, model,
...@@ -1911,18 +1913,18 @@ def test_resolve_content_format_hf_defined(model, expected_format): ...@@ -1911,18 +1913,18 @@ def test_resolve_content_format_hf_defined(model, expected_format):
assert resolved_format == expected_format assert resolved_format == expected_format
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("model", "expected_format"), ("model", "expected_format"),
[("Salesforce/blip2-opt-2.7b", "string"), [
("Salesforce/blip2-opt-2.7b", "string"),
("facebook/chameleon-7b", "string"), ("facebook/chameleon-7b", "string"),
("deepseek-ai/deepseek-vl2-tiny", "string"), ("deepseek-ai/deepseek-vl2-tiny", "string"),
("adept/fuyu-8b", "string"), ("adept/fuyu-8b", "string"),
("google/paligemma-3b-mix-224", "string"), ("google/paligemma-3b-mix-224", "string"),
("Qwen/Qwen-VL", "string"), ("Qwen/Qwen-VL", "string"),
("Qwen/Qwen-VL-Chat", "string")], ("Qwen/Qwen-VL-Chat", "string"),
],
) )
# yapf: enable
def test_resolve_content_format_fallbacks(model, expected_format): def test_resolve_content_format_fallbacks(model, expected_format):
model_info = HF_EXAMPLE_MODELS.find_hf_info(model) model_info = HF_EXAMPLE_MODELS.find_hf_info(model)
model_info.check_available_online(on_fail="skip") model_info.check_available_online(on_fail="skip")
...@@ -1936,7 +1938,8 @@ def test_resolve_content_format_fallbacks(model, expected_format): ...@@ -1936,7 +1938,8 @@ def test_resolve_content_format_fallbacks(model, expected_format):
hf_overrides=model_info.hf_overrides, hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.skip_tokenizer_init, skip_tokenizer_init=model_info.skip_tokenizer_init,
enforce_eager=model_info.enforce_eager, enforce_eager=model_info.enforce_eager,
dtype=model_info.dtype) dtype=model_info.dtype,
)
tokenizer = get_tokenizer( tokenizer = get_tokenizer(
model_config.tokenizer, model_config.tokenizer,
...@@ -1968,10 +1971,10 @@ def test_resolve_content_format_fallbacks(model, expected_format): ...@@ -1968,10 +1971,10 @@ def test_resolve_content_format_fallbacks(model, expected_format):
assert resolved_format == expected_format assert resolved_format == expected_format
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("template_path", "expected_format"), ("template_path", "expected_format"),
[("template_alpaca.jinja", "string"), [
("template_alpaca.jinja", "string"),
("template_baichuan.jinja", "string"), ("template_baichuan.jinja", "string"),
("template_chatglm.jinja", "string"), ("template_chatglm.jinja", "string"),
("template_chatglm2.jinja", "string"), ("template_chatglm2.jinja", "string"),
...@@ -1989,9 +1992,9 @@ def test_resolve_content_format_fallbacks(model, expected_format): ...@@ -1989,9 +1992,9 @@ def test_resolve_content_format_fallbacks(model, expected_format):
("tool_chat_template_llama3.1_json.jinja", "openai"), ("tool_chat_template_llama3.1_json.jinja", "openai"),
("tool_chat_template_llama3.2_json.jinja", "openai"), ("tool_chat_template_llama3.2_json.jinja", "openai"),
("tool_chat_template_mistral_parallel.jinja", "string"), ("tool_chat_template_mistral_parallel.jinja", "string"),
("tool_chat_template_mistral.jinja", "string")], ("tool_chat_template_mistral.jinja", "string"),
],
) )
# yapf: enable
def test_resolve_content_format_examples(template_path, expected_format): def test_resolve_content_format_examples(template_path, expected_format):
model_config = ModelConfig( model_config = ModelConfig(
PHI3V_MODEL_ID, # Dummy PHI3V_MODEL_ID, # Dummy
...@@ -2024,40 +2027,34 @@ def test_resolve_content_format_examples(template_path, expected_format): ...@@ -2024,40 +2027,34 @@ def test_resolve_content_format_examples(template_path, expected_format):
assert resolved_format == expected_format assert resolved_format == expected_format
def test_parse_chat_messages_include_thinking_chunk(mistral_model_config, def test_parse_chat_messages_include_thinking_chunk(
mistral_tokenizer): mistral_model_config, mistral_tokenizer
messages = [{ ):
"role": messages = [
"system", {
"content": [{ "role": "system",
"type": "text", "content": [
"text": "You are a helpful assistant." {"type": "text", "text": "You are a helpful assistant."},
}, { {
"type":
"thinking",
"closed":
True,
"thinking":
"Only return the answer when you are confident."
}]
}, {
"role": "user",
"content": "What is 2+2?"
}, {
"role":
"assistant",
"content": [{
"type": "text",
"text": "Let me think about it."
}, {
"type": "thinking", "type": "thinking",
"closed": True, "closed": True,
"thinking": "2+2 = 4" "thinking": "Only return the answer when you are confident.",
}, { },
],
},
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Let me think about it."},
{"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
{
"type": "text", "type": "text",
"text": "The answer is 4.", "text": "The answer is 4.",
}], },
}] ],
},
]
conversation_with_thinking, _, _ = parse_chat_messages( conversation_with_thinking, _, _ = parse_chat_messages(
messages, messages,
...@@ -2066,122 +2063,105 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config, ...@@ -2066,122 +2063,105 @@ def test_parse_chat_messages_include_thinking_chunk(mistral_model_config,
content_format="openai", content_format="openai",
) )
expected_conversation = [{ expected_conversation = [
"role": {
"system", "role": "system",
"content": [{
"type": "text",
"text": "You are a helpful assistant."
}, {
"type": "text",
"text": "Only return the answer when you are confident."
}],
}, {
"role":
"user",
"content": [{
"type": "text",
"text": "What is 2+2?"
}],
}, {
"role":
"assistant",
"content": [ "content": [
{"type": "text", "text": "You are a helpful assistant."},
{ {
"type": "text", "type": "text",
"text": "Let me think about it." "text": "Only return the answer when you are confident.",
},
],
}, },
{ {
"type": "text", "role": "user",
"text": "2+2 = 4" "content": [{"type": "text", "text": "What is 2+2?"}],
}, },
{ {
"type": "text", "role": "assistant",
"text": "The answer is 4." "content": [
{"type": "text", "text": "Let me think about it."},
{"type": "text", "text": "2+2 = 4"},
{"type": "text", "text": "The answer is 4."},
],
}, },
] ]
}]
assert conversation_with_thinking == expected_conversation assert conversation_with_thinking == expected_conversation
def test_apply_mistral_chat_template_thinking_chunk(): def test_apply_mistral_chat_template_thinking_chunk():
# Moved import here to avoid yapf and isort conflicts messages = [
from vllm.entrypoints.chat_utils import apply_mistral_chat_template {
messages = [{ "role": "system",
"role": "content": [
"system", {"type": "text", "text": "You are a helpful assistant."},
"content": [{ {
"type": "text",
"text": "You are a helpful assistant."
}, {
"type":
"thinking",
"closed":
True,
"thinking":
"Only return the answer when you are confident."
}]
}, {
"role": "user",
"content": "What is 2+2?"
}, {
"role":
"assistant",
"content": [{
"type": "text",
"text": "Let me think about it."
}, {
"type": "thinking", "type": "thinking",
"closed": True, "closed": True,
"thinking": "2+2 = 4" "thinking": "Only return the answer when you are confident.",
}, { },
],
},
{"role": "user", "content": "What is 2+2?"},
{
"role": "assistant",
"content": [
{"type": "text", "text": "Let me think about it."},
{"type": "thinking", "closed": True, "thinking": "2+2 = 4"},
{
"type": "text", "type": "text",
"text": "The answer is 4.", "text": "The answer is 4.",
}], },
}, { ],
"role": "user", },
"content": "Thanks, what is 3+3?" {"role": "user", "content": "Thanks, what is 3+3?"},
}] ]
# TODO(Julien): upon model release change to a tokenizer already configured. # TODO(Julien): upon model release change to a tokenizer already configured.
# ================================================================= # =================================================================
mistral_tokenizer = MistralTokenizer.from_pretrained( mistral_tokenizer = MistralTokenizer.from_pretrained(
"mistralai/Devstral-Small-2507") "mistralai/Devstral-Small-2507"
)
assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer) assert isinstance(mistral_tokenizer.tokenizer, Tekkenizer)
# Add think special tokens to the tokenizer # Add think special tokens to the tokenizer
mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo( mistral_tokenizer.tokenizer._all_special_tokens[35] = SpecialTokenInfo(
rank=35, is_control=True, token_str=SpecialTokens.begin_think.value) rank=35, is_control=True, token_str=SpecialTokens.begin_think.value
)
mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo( mistral_tokenizer.tokenizer._all_special_tokens[36] = SpecialTokenInfo(
rank=36, is_control=True, token_str=SpecialTokens.end_think.value) rank=36, is_control=True, token_str=SpecialTokens.end_think.value
)
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = { mistral_tokenizer.tokenizer._special_tokens_reverse_vocab = {
k: v k: v
for k, v in for k, v in mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab.items()
if v not in {35, 36} if v not in {35, 36}
} }
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[ mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
SpecialTokens.begin_think.value] = 35 SpecialTokens.begin_think.value
] = 35
mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[ mistral_tokenizer.tokenizer._special_tokens_reverse_vocab[
SpecialTokens.end_think.value] = 36 SpecialTokens.end_think.value
] = 36
mistral_tokenizer.instruct.BEGIN_THINK = 35 mistral_tokenizer.instruct.BEGIN_THINK = 35
mistral_tokenizer.instruct.END_THINK = 36 mistral_tokenizer.instruct.END_THINK = 36
# ================================================================= # =================================================================
tokens_ids = apply_mistral_chat_template(mistral_tokenizer, tokens_ids = apply_mistral_chat_template(
messages, mistral_tokenizer, messages, chat_template=None, tools=None
chat_template=None, )
tools=None)
string_tokens = mistral_tokenizer.mistral.decode( string_tokens = mistral_tokenizer.mistral.decode(
tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP) tokens_ids, special_token_policy=SpecialTokenPolicy.KEEP
)
expected_tokens = ( expected_tokens = (
r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the" r"<s>[SYSTEM_PROMPT]You are a helpful assistant.[THINK]Only return the"
r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]" r" answer when you are confident.[/THINK][/SYSTEM_PROMPT]"
r"[INST]What is 2+2?[/INST]" r"[INST]What is 2+2?[/INST]"
r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>" r"Let me think about it.[THINK]2+2 = 4[/THINK]The answer is 4.</s>"
r"[INST]Thanks, what is 3+3?[/INST]") r"[INST]Thanks, what is 3+3?[/INST]"
)
assert string_tokens == expected_tokens assert string_tokens == expected_tokens
...@@ -2192,37 +2172,32 @@ def test_parse_chat_messages_single_empty_audio_with_uuid( ...@@ -2192,37 +2172,32 @@ def test_parse_chat_messages_single_empty_audio_with_uuid(
): ):
audio_uuid = "abcd" audio_uuid = "abcd"
conversation, mm_data, mm_uuids = parse_chat_messages( conversation, mm_data, mm_uuids = parse_chat_messages(
[{ [
"role": {
"user", "role": "user",
"content": [ "content": [
{ {
"type": "input_audio", "type": "input_audio",
"input_audio": {}, "input_audio": {},
"uuid": audio_uuid, "uuid": audio_uuid,
}, },
{ {"type": "text", "text": "What does the audio say?"},
"type": "text", ],
"text": "What does the audio say?" }
},
], ],
}],
qwen2_audio_model_config, qwen2_audio_model_config,
qwen2_audio_tokenizer, qwen2_audio_tokenizer,
content_format="string", content_format="string",
) )
assert conversation == [{ assert conversation == [
"role": {
"user", "role": "user",
"content": "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
"Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?" }
}] ]
_assert_mm_data_inputs(mm_data, {"audio": 1}) _assert_mm_data_inputs(mm_data, {"audio": 1})
_assert_mm_uuids(mm_uuids, _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
1,
modality="audio",
expected_uuids=[audio_uuid])
@pytest.mark.asyncio @pytest.mark.asyncio
...@@ -2232,34 +2207,29 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async( ...@@ -2232,34 +2207,29 @@ async def test_parse_chat_messages_single_empty_audio_with_uuid_async(
): ):
audio_uuid = "abcd" audio_uuid = "abcd"
conversation, mm_future, mm_uuids = parse_chat_messages_futures( conversation, mm_future, mm_uuids = parse_chat_messages_futures(
[{ [
"role": {
"user", "role": "user",
"content": [ "content": [
{ {
"type": "input_audio", "type": "input_audio",
"input_audio": {}, "input_audio": {},
"uuid": audio_uuid, "uuid": audio_uuid,
}, },
{ {"type": "text", "text": "What does the audio say?"},
"type": "text", ],
"text": "What does the audio say?" }
},
], ],
}],
qwen2_audio_model_config, qwen2_audio_model_config,
qwen2_audio_tokenizer, qwen2_audio_tokenizer,
content_format="string", content_format="string",
) )
assert conversation == [{ assert conversation == [
"role": {
"user", "role": "user",
"content": "content": "Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?",
"Audio 1: <|audio_bos|><|AUDIO|><|audio_eos|>\nWhat does the audio say?" }
}] ]
_assert_mm_data_inputs(await mm_future, {"audio": 1}) _assert_mm_data_inputs(await mm_future, {"audio": 1})
_assert_mm_uuids(mm_uuids, _assert_mm_uuids(mm_uuids, 1, modality="audio", expected_uuids=[audio_uuid])
1,
modality="audio",
expected_uuids=[audio_uuid])
...@@ -12,9 +12,6 @@ import torch ...@@ -12,9 +12,6 @@ import torch
import torch.nn.functional as F import torch.nn.functional as F
from vllm.config.lora import LoRAConfig from vllm.config.lora import LoRAConfig
# yapf conflicts with isort for this block
# yapf: disable
from vllm.lora.layers import ( from vllm.lora.layers import (
BaseLayerWithLoRA, BaseLayerWithLoRA,
ColumnParallelLinearWithLoRA, ColumnParallelLinearWithLoRA,
...@@ -32,8 +29,6 @@ from vllm.lora.layers import ( ...@@ -32,8 +29,6 @@ from vllm.lora.layers import (
RowParallelLinearWithShardedLoRA, RowParallelLinearWithShardedLoRA,
VocabParallelEmbeddingWithLoRA, VocabParallelEmbeddingWithLoRA,
) )
# yapf: enable
from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.models import LoRALayerWeights, PackedLoRALayerWeights
from vllm.lora.punica_wrapper import get_punica_wrapper from vllm.lora.punica_wrapper import get_punica_wrapper
from vllm.model_executor.layers.linear import ( from vllm.model_executor.layers.linear import (
......
...@@ -17,8 +17,6 @@ import vllm.model_executor.model_loader.tensorizer ...@@ -17,8 +17,6 @@ import vllm.model_executor.model_loader.tensorizer
from tests.utils import VLLM_PATH, RemoteOpenAIServer from tests.utils import VLLM_PATH, RemoteOpenAIServer
from vllm import LLM, SamplingParams from vllm import LLM, SamplingParams
from vllm.engine.arg_utils import EngineArgs from vllm.engine.arg_utils import EngineArgs
# yapf: disable
from vllm.model_executor.model_loader.tensorizer import ( from vllm.model_executor.model_loader.tensorizer import (
TensorizerConfig, TensorizerConfig,
TensorSerializer, TensorSerializer,
...@@ -29,8 +27,6 @@ from vllm.model_executor.model_loader.tensorizer import ( ...@@ -29,8 +27,6 @@ from vllm.model_executor.model_loader.tensorizer import (
from vllm.model_executor.model_loader.tensorizer_loader import ( from vllm.model_executor.model_loader.tensorizer_loader import (
BLACKLISTED_TENSORIZER_ARGS, BLACKLISTED_TENSORIZER_ARGS,
) )
# yapf: enable
from vllm.utils import PlaceholderModule from vllm.utils import PlaceholderModule
from .conftest import DummyExecutor, assert_from_collective_rpc from .conftest import DummyExecutor, assert_from_collective_rpc
......
...@@ -114,7 +114,6 @@ def get_parametrized_options( ...@@ -114,7 +114,6 @@ def get_parametrized_options(
raise ValueError("Test has type CUSTOM_INPUTS, but none given") raise ValueError("Test has type CUSTOM_INPUTS, but none given")
iter_kwargs["custom_test_opts"] = test_info.custom_test_opts iter_kwargs["custom_test_opts"] = test_info.custom_test_opts
# yapf: disable
# Wrap all model cases in a pytest parameter & pass marks through # Wrap all model cases in a pytest parameter & pass marks through
return [ return [
pytest.param( pytest.param(
...@@ -122,10 +121,10 @@ def get_parametrized_options( ...@@ -122,10 +121,10 @@ def get_parametrized_options(
ExpandableVLMTestArgs( ExpandableVLMTestArgs(
**{k: v for k, v in zip(iter_kwargs.keys(), case)} **{k: v for k, v in zip(iter_kwargs.keys(), case)}
), ),
marks=test_info.marks if test_info.marks is not None else [] marks=test_info.marks if test_info.marks is not None else [],
) for case in list(itertools.product(*iter_kwargs.values())) )
for case in list(itertools.product(*iter_kwargs.values()))
] ]
# yapf: enable
# Get a list per model type, where each entry contains a tuple of all of # Get a list per model type, where each entry contains a tuple of all of
# that model type's cases, then flatten them into the top level so that # that model type's cases, then flatten them into the top level so that
......
...@@ -418,7 +418,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner: ...@@ -418,7 +418,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
self.image_size = self.vision_config.image_size self.image_size = self.vision_config.image_size
def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs): def __call__(self, text: str, images: Union[Image, list[Image]], **kwargs):
# yapf: disable
from vllm.model_executor.models.h2ovl import ( from vllm.model_executor.models.h2ovl import (
IMG_CONTEXT, IMG_CONTEXT,
IMG_END, IMG_END,
...@@ -426,7 +425,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner: ...@@ -426,7 +425,6 @@ def h2ovl_patch_hf_runner(hf_model: HfRunner) -> HfRunner:
image_to_pixel_values_h2ovl, image_to_pixel_values_h2ovl,
) )
# yapf: enable
images = [images] if isinstance(images, Image) else images images = [images] if isinstance(images, Image) else images
pixel_values = [ pixel_values = [
image_to_pixel_values_h2ovl( image_to_pixel_values_h2ovl(
......
...@@ -33,24 +33,26 @@ TEST_IMG_PLACEHOLDER = "<vlm_image>" ...@@ -33,24 +33,26 @@ TEST_IMG_PLACEHOLDER = "<vlm_image>"
TEST_VIDEO_PLACEHOLDER = "<vlm_video>" TEST_VIDEO_PLACEHOLDER = "<vlm_video>"
TEST_AUDIO_PLACEHOLDER = "<lmm_audio>" TEST_AUDIO_PLACEHOLDER = "<lmm_audio>"
# yapf: disable SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts(
SINGLE_IMAGE_BASE_PROMPTS = IMAGE_ASSETS.prompts({ {
"stop_sign": f"{TEST_IMG_PLACEHOLDER}What's the content of the image?", "stop_sign": f"{TEST_IMG_PLACEHOLDER}What's the content of the image?",
"cherry_blossom": f"{TEST_IMG_PLACEHOLDER}What is the season?", "cherry_blossom": f"{TEST_IMG_PLACEHOLDER}What is the season?",
}) }
SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts({ )
SINGLE_AUDIO_BASE_PROMPT = AUDIO_ASSETS.prompts(
{
"mary_had_lamb": f"{TEST_AUDIO_PLACEHOLDER}Transcribe this audio into English.", # noqa: E501 "mary_had_lamb": f"{TEST_AUDIO_PLACEHOLDER}Transcribe this audio into English.", # noqa: E501
"winning_call": f"{TEST_AUDIO_PLACEHOLDER}What is happening in this audio clip?", # noqa: E501 "winning_call": f"{TEST_AUDIO_PLACEHOLDER}What is happening in this audio clip?", # noqa: E501
}) }
)
MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PLACEHOLDER}Describe the two images in detail.\n" # noqa: E501 MULTI_IMAGE_BASE_PROMPT = f"Image-1: {TEST_IMG_PLACEHOLDER}Image-2: {TEST_IMG_PLACEHOLDER}Describe the two images in detail.\n" # noqa: E501
VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?" VIDEO_BASE_PROMPT = f"{TEST_VIDEO_PLACEHOLDER}Why is this video funny?"
IMAGE_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)] IMAGE_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0), (0.25, 0.5, 1.0)]
EMBEDDING_SIZE_FACTORS = [(), (1.0, ), (1.0, 1.0, 1.0)] EMBEDDING_SIZE_FACTORS = [(), (1.0,), (1.0, 1.0, 1.0)]
RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]] RunnerOutput = tuple[list[int], str, Optional[SampleLogprobs]]
# yapf: enable
class PromptWithMultiModalInput(NamedTuple): class PromptWithMultiModalInput(NamedTuple):
......
...@@ -322,8 +322,9 @@ def _test_processing_correctness_one( ...@@ -322,8 +322,9 @@ def _test_processing_correctness_one(
) )
# yapf: disable @pytest.mark.parametrize(
@pytest.mark.parametrize("model_id", [ "model_id",
[
"rhymes-ai/Aria", "rhymes-ai/Aria",
"CohereForAI/aya-vision-8b", "CohereForAI/aya-vision-8b",
"Salesforce/blip2-opt-2.7b", "Salesforce/blip2-opt-2.7b",
...@@ -391,11 +392,11 @@ def _test_processing_correctness_one( ...@@ -391,11 +392,11 @@ def _test_processing_correctness_one(
"omni-research/Tarsier-7b", "omni-research/Tarsier-7b",
"omni-research/Tarsier2-Recap-7b", "omni-research/Tarsier2-Recap-7b",
"mistralai/Voxtral-Mini-3B-2507", "mistralai/Voxtral-Mini-3B-2507",
]) ],
)
@pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0]) @pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0])
@pytest.mark.parametrize("num_batches", [32]) @pytest.mark.parametrize("num_batches", [32])
@pytest.mark.parametrize("simplify_rate", [1.0]) @pytest.mark.parametrize("simplify_rate", [1.0])
# yapf: enable
def test_processing_correctness( def test_processing_correctness(
model_id: str, model_id: str,
hit_rate: float, hit_rate: float,
......
...@@ -12,7 +12,6 @@ from ...utils import build_model_context ...@@ -12,7 +12,6 @@ from ...utils import build_model_context
@pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"]) @pytest.mark.parametrize("model_id", ["HuggingFaceM4/Idefics3-8B-Llama3"])
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("mm_processor_kwargs", "expected_toks_per_img"), ("mm_processor_kwargs", "expected_toks_per_img"),
[ [
...@@ -20,7 +19,6 @@ from ...utils import build_model_context ...@@ -20,7 +19,6 @@ from ...utils import build_model_context
({"size": {"longest_edge": 728}}, 169 * (2**2 + 1)), ({"size": {"longest_edge": 728}}, 169 * (2**2 + 1)),
], ],
) )
# yapf: enable
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
@pytest.mark.parametrize("kwargs_on_init", [True, False]) @pytest.mark.parametrize("kwargs_on_init", [True, False])
def test_processor_override( def test_processor_override(
......
...@@ -11,7 +11,6 @@ from ...utils import build_model_context ...@@ -11,7 +11,6 @@ from ...utils import build_model_context
@pytest.mark.parametrize("model_id", ["microsoft/Phi-3.5-vision-instruct"]) @pytest.mark.parametrize("model_id", ["microsoft/Phi-3.5-vision-instruct"])
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("mm_processor_kwargs", "expected_toks_per_img"), ("mm_processor_kwargs", "expected_toks_per_img"),
[ [
...@@ -21,7 +20,6 @@ from ...utils import build_model_context ...@@ -21,7 +20,6 @@ from ...utils import build_model_context
({}, 757), ({}, 757),
], ],
) )
# yapf: enable
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
@pytest.mark.parametrize("kwargs_on_init", [True, False]) @pytest.mark.parametrize("kwargs_on_init", [True, False])
def test_processor_override( def test_processor_override(
......
...@@ -11,7 +11,6 @@ from ...utils import build_model_context ...@@ -11,7 +11,6 @@ from ...utils import build_model_context
@pytest.mark.parametrize("model_id", ["microsoft/Phi-4-multimodal-instruct"]) @pytest.mark.parametrize("model_id", ["microsoft/Phi-4-multimodal-instruct"])
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("mm_processor_kwargs", "expected_toks_per_img"), ("mm_processor_kwargs", "expected_toks_per_img"),
[ [
...@@ -21,7 +20,6 @@ from ...utils import build_model_context ...@@ -21,7 +20,6 @@ from ...utils import build_model_context
({}, 9585), ({}, 9585),
], ],
) )
# yapf: enable
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
@pytest.mark.parametrize("kwargs_on_init", [True, False]) @pytest.mark.parametrize("kwargs_on_init", [True, False])
def test_processor_override( def test_processor_override(
......
...@@ -10,7 +10,6 @@ from ...utils import build_model_context ...@@ -10,7 +10,6 @@ from ...utils import build_model_context
@pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"]) @pytest.mark.parametrize("model_id", ["Qwen/Qwen2-VL-2B-Instruct"])
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("mm_processor_kwargs", "expected_toks_per_img", "expected_pixels_shape"), ("mm_processor_kwargs", "expected_toks_per_img", "expected_pixels_shape"),
[ [
...@@ -18,7 +17,6 @@ from ...utils import build_model_context ...@@ -18,7 +17,6 @@ from ...utils import build_model_context
({"min_pixels": 64**2, "max_pixels": 512**2}, 330, (1320, 1176)), ({"min_pixels": 64**2, "max_pixels": 512**2}, 330, (1320, 1176)),
], ],
) )
# yapf: enable
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
@pytest.mark.parametrize("kwargs_on_init", [True, False]) @pytest.mark.parametrize("kwargs_on_init", [True, False])
def test_processor_override( def test_processor_override(
......
...@@ -12,7 +12,6 @@ from ...utils import build_model_context ...@@ -12,7 +12,6 @@ from ...utils import build_model_context
@pytest.mark.parametrize("model_id", ["HuggingFaceTB/SmolVLM2-2.2B-Instruct"]) @pytest.mark.parametrize("model_id", ["HuggingFaceTB/SmolVLM2-2.2B-Instruct"])
# yapf: disable
@pytest.mark.parametrize( @pytest.mark.parametrize(
("mm_processor_kwargs", "expected_toks_per_img"), ("mm_processor_kwargs", "expected_toks_per_img"),
[ [
...@@ -20,7 +19,6 @@ from ...utils import build_model_context ...@@ -20,7 +19,6 @@ from ...utils import build_model_context
({"max_image_size": {"longest_edge": 768}}, 405), ({"max_image_size": {"longest_edge": 768}}, 405),
], ],
) )
# yapf: enable
@pytest.mark.parametrize("num_imgs", [1, 2]) @pytest.mark.parametrize("num_imgs", [1, 2])
@pytest.mark.parametrize("kwargs_on_init", [True, False]) @pytest.mark.parametrize("kwargs_on_init", [True, False])
def test_processor_override( def test_processor_override(
......
...@@ -7,9 +7,7 @@ from vllm.config import ModelConfig ...@@ -7,9 +7,7 @@ from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
# yapf: disable @pytest.mark.parametrize("model_id", ["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
@pytest.mark.parametrize("model_id",
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
def test_multimodal_processor(model_id): def test_multimodal_processor(model_id):
model_config = ModelConfig( model_config = ModelConfig(
model=model_id, model=model_id,
...@@ -18,7 +16,7 @@ def test_multimodal_processor(model_id): ...@@ -18,7 +16,7 @@ def test_multimodal_processor(model_id):
mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config) mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
image_pil = ImageAsset('cherry_blossom').pil_image image_pil = ImageAsset("cherry_blossom").pil_image
mm_data = {"image": image_pil} mm_data = {"image": image_pil}
str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n" # noqa: E501 str_prompt = "<|im_start|>user <image>\nWhat is the content of this image?<|im_end|><|im_start|>assistant\n" # noqa: E501
str_processed_inputs = mm_processor.apply( str_processed_inputs = mm_processor.apply(
...@@ -28,8 +26,23 @@ def test_multimodal_processor(model_id): ...@@ -28,8 +26,23 @@ def test_multimodal_processor(model_id):
) )
ids_prompt = [ ids_prompt = [
151644, 872, 220, 151646, 198, 3838, 374, 279, 2213, 315, 419, 2168, 151644,
30, 151645, 151644, 77091, 198 872,
220,
151646,
198,
3838,
374,
279,
2213,
315,
419,
2168,
30,
151645,
151644,
77091,
198,
] ]
ids_processed_inputs = mm_processor.apply( ids_processed_inputs = mm_processor.apply(
prompt=ids_prompt, prompt=ids_prompt,
...@@ -37,5 +50,7 @@ def test_multimodal_processor(model_id): ...@@ -37,5 +50,7 @@ def test_multimodal_processor(model_id):
hf_processor_mm_kwargs={}, hf_processor_mm_kwargs={},
) )
assert (str_processed_inputs["prompt_token_ids"] assert (
== ids_processed_inputs["prompt_token_ids"]) str_processed_inputs["prompt_token_ids"]
== ids_processed_inputs["prompt_token_ids"]
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment