Unverified Commit e83b7e37 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

Revert "[Renderer] Separate out `RendererConfig` from `ModelConfig` (#30145)" (#30199)

parent 27f4c2fd
...@@ -162,7 +162,7 @@ def test_processor_override( ...@@ -162,7 +162,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)}, limit_mm_per_prompt={"image": len(size_factors)},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1 min_num = min_dynamic_patch if dynamic_image_size else 1
......
...@@ -38,7 +38,7 @@ def test_processor_override( ...@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass # Build the image str / prompt based on the number of images we pass
......
...@@ -116,7 +116,7 @@ def test_processor_override( ...@@ -116,7 +116,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)}, limit_mm_per_prompt={"image": len(size_factors)},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1 min_num = min_dynamic_patch if dynamic_image_size else 1
......
...@@ -30,7 +30,7 @@ def test_processor_override( ...@@ -30,7 +30,7 @@ def test_processor_override(
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
mm_processor_cache_gb=mm_processor_cache_gb, mm_processor_cache_gb=mm_processor_cache_gb,
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
config = processor.info.get_hf_config() config = processor.info.get_hf_config()
tokenizer = processor.info.get_tokenizer() tokenizer = processor.info.get_tokenizer()
hf_processor = processor.info.get_hf_processor() hf_processor = processor.info.get_hf_processor()
......
...@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id): ...@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1}, limit_mm_per_prompt={"image": 1},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info info = processor.info
seen_aspect_ratios = set[float]() seen_aspect_ratios = set[float]()
...@@ -140,7 +140,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs): ...@@ -140,7 +140,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [ image_ratios = [
(171, 152), (171, 152),
...@@ -173,7 +173,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs): ...@@ -173,7 +173,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
seen_aspect_ratios = set[float]() seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]() image_sizes = list[ImageSize]()
......
...@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id): ...@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1}, limit_mm_per_prompt={"image": 1},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info info = processor.info
seen_aspect_ratios = set[float]() seen_aspect_ratios = set[float]()
...@@ -138,7 +138,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs): ...@@ -138,7 +138,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [ image_ratios = [
(171, 152), (171, 152),
...@@ -171,7 +171,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs): ...@@ -171,7 +171,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
seen_aspect_ratios = set[float]() seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]() image_sizes = list[ImageSize]()
......
...@@ -24,7 +24,7 @@ def test_processor_override( ...@@ -24,7 +24,7 @@ def test_processor_override(
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
prompt = "<image>" * num_imgs prompt = "<image>" * num_imgs
image = Image.new("RGB", size=(364, 364)) image = Image.new("RGB", size=(364, 364))
mm_data = {"image": [image] * num_imgs} mm_data = {"image": [image] * num_imgs}
...@@ -83,7 +83,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs): ...@@ -83,7 +83,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None, mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [ image_ratios = [
(171, 152), (171, 152),
......
...@@ -25,7 +25,7 @@ def test_profiling(model_id: str, max_model_len: int): ...@@ -25,7 +25,7 @@ def test_profiling(model_id: str, max_model_len: int):
limit_mm_per_prompt=mm_counts, limit_mm_per_prompt=mm_counts,
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
profiler = MultiModalProfiler(processor) profiler = MultiModalProfiler(processor)
decoder_dummy_data = profiler.get_decoder_dummy_data( decoder_dummy_data = profiler.get_decoder_dummy_data(
......
...@@ -118,7 +118,7 @@ def test_processor_override( ...@@ -118,7 +118,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)}, limit_mm_per_prompt={"image": len(size_factors)},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1 min_num = min_dynamic_patch if dynamic_image_size else 1
......
...@@ -39,7 +39,7 @@ def test_processor_override( ...@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass # Build the image str / prompt based on the number of images we pass
......
...@@ -39,7 +39,7 @@ def test_processor_override( ...@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass # Build the image str / prompt based on the number of images we pass
......
...@@ -34,7 +34,7 @@ def test_processor_override( ...@@ -34,7 +34,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
tokenizer = processor.info.get_tokenizer() tokenizer = processor.info.get_tokenizer()
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
......
...@@ -38,7 +38,7 @@ def test_processor_override( ...@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None, mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs}, limit_mm_per_prompt={"image": num_imgs},
) )
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass # Build the image str / prompt based on the number of images we pass
......
...@@ -11,7 +11,7 @@ import pytest ...@@ -11,7 +11,7 @@ import pytest
import torch.nn as nn import torch.nn as nn
from PIL import Image from PIL import Image
from vllm.config import ModelConfig, RendererConfig, VllmConfig, set_current_vllm_config from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config
from vllm.config.multimodal import ( from vllm.config.multimodal import (
AudioDummyOptions, AudioDummyOptions,
BaseDummyOptions, BaseDummyOptions,
...@@ -31,6 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs ...@@ -31,6 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.multimodal.utils import group_mm_kwargs_by_modality from vllm.multimodal.utils import group_mm_kwargs_by_modality
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.utils.collection_utils import is_list_of from vllm.utils.collection_utils import is_list_of
from vllm.utils.torch_utils import set_default_torch_dtype from vllm.utils.torch_utils import set_default_torch_dtype
...@@ -149,10 +150,7 @@ def initialize_dummy_model( ...@@ -149,10 +150,7 @@ def initialize_dummy_model(
backend="nccl", backend="nccl",
) )
initialize_model_parallel(tensor_model_parallel_size=1) initialize_model_parallel(tensor_model_parallel_size=1)
vllm_config = VllmConfig( vllm_config = VllmConfig(model_config=model_config)
model_config=model_config,
renderer_config=RendererConfig(model_config=model_config),
)
with set_current_vllm_config(vllm_config=vllm_config): with set_current_vllm_config(vllm_config=vllm_config):
with set_default_torch_dtype(model_config.dtype): with set_default_torch_dtype(model_config.dtype):
model = model_cls(vllm_config=vllm_config) model = model_cls(vllm_config=vllm_config)
...@@ -184,12 +182,19 @@ def test_model_tensor_schema(model_id: str): ...@@ -184,12 +182,19 @@ def test_model_tensor_schema(model_id: str):
else: else:
dtype = model_info.dtype dtype = model_info.dtype
renderer_config = model_info.build_renderer_config( model_config = ModelConfig(
model_id, model_id,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=model_info.tokenizer_mode,
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
hf_overrides=hf_overrides_fn, hf_overrides=hf_overrides_fn,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
dtype=dtype, dtype=dtype,
) )
model_config = renderer_config.model_config
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config) model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
assert supports_multimodal(model_cls) assert supports_multimodal(model_cls)
...@@ -207,7 +212,10 @@ def test_model_tensor_schema(model_id: str): ...@@ -207,7 +212,10 @@ def test_model_tensor_schema(model_id: str):
if not any(inputs_parse_methods): if not any(inputs_parse_methods):
pytest.skip(f"{model_arch} does not support tensor schema validation.") pytest.skip(f"{model_arch} does not support tensor schema validation.")
ctx = InputProcessingContext.from_config(renderer_config) ctx = InputProcessingContext(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
processing_info = factories.info(ctx) processing_info = factories.info(ctx)
supported_mm_limits = processing_info.get_supported_mm_limits() supported_mm_limits = processing_info.get_supported_mm_limits()
limit_mm_per_prompt = { limit_mm_per_prompt = {
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
import pytest import pytest
from vllm.assets.image import ImageAsset from vllm.assets.image import ImageAsset
from vllm.config import ModelConfig, RendererConfig from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
...@@ -13,9 +13,8 @@ def test_multimodal_processor(model_id): ...@@ -13,9 +13,8 @@ def test_multimodal_processor(model_id):
model=model_id, model=model_id,
model_impl="transformers", model_impl="transformers",
) )
renderer_config = RendererConfig(model_config=model_config)
mm_processor = MULTIMODAL_REGISTRY.create_processor(renderer_config) mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
image_pil = ImageAsset("cherry_blossom").pil_image image_pil = ImageAsset("cherry_blossom").pil_image
mm_data = {"image": image_pil} mm_data = {"image": image_pil}
......
...@@ -7,6 +7,7 @@ import torch ...@@ -7,6 +7,7 @@ import torch
import transformers import transformers
from transformers import AutoConfig, PreTrainedModel from transformers import AutoConfig, PreTrainedModel
from vllm.config import ModelConfig
from vllm.model_executor.models.utils import WeightsMapper from vllm.model_executor.models.utils import WeightsMapper
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.config import try_get_safetensors_metadata from vllm.transformers_utils.config import try_get_safetensors_metadata
...@@ -49,11 +50,37 @@ def test_hf_model_weights_mapper(model_arch: str): ...@@ -49,11 +50,37 @@ def test_hf_model_weights_mapper(model_arch: str):
model_info.check_available_online(on_fail="skip") model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip") model_info.check_transformers_version(on_fail="skip")
model_config = model_info.build_model_config(config_format="hf") is_mistral_model = model_arch in [
"Mistral3ForConditionalGeneration",
"PixtralForConditionalGeneration",
"VoxtralForConditionalGeneration",
]
if not is_mistral_model or model_info.tokenizer_mode == "mistral":
tokenizer_mode = model_info.tokenizer_mode
else:
tokenizer_mode = "hf"
model_id = model_info.default
model_config = ModelConfig(
model_id,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=tokenizer_mode,
config_format="hf",
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
dtype=model_info.dtype,
)
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config) model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
original_weights = create_repo_dummy_weights(model_config.model) original_weights = create_repo_dummy_weights(model_id)
hf_dummy_model = create_dummy_model(model_config.model, model_arch) hf_dummy_model = create_dummy_model(model_id, model_arch)
hf_converted_weights = hf_dummy_model.named_parameters() hf_converted_weights = hf_dummy_model.named_parameters()
hf_converted_buffers = hf_dummy_model.named_buffers() hf_converted_buffers = hf_dummy_model.named_buffers()
mapper: WeightsMapper = model_cls.hf_to_vllm_mapper mapper: WeightsMapper = model_cls.hf_to_vllm_mapper
......
...@@ -9,8 +9,7 @@ import pytest ...@@ -9,8 +9,7 @@ import pytest
from packaging.version import Version from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION from transformers import __version__ as TRANSFORMERS_VERSION
from vllm.config.model import ModelConfig, ModelDType from vllm.config.model import ModelDType, TokenizerMode
from vllm.config.renderer import RendererConfig, TokenizerMode
@dataclass(frozen=True) @dataclass(frozen=True)
...@@ -171,36 +170,6 @@ class _HfExamplesInfo: ...@@ -171,36 +170,6 @@ class _HfExamplesInfo:
else: else:
pytest.skip(msg) pytest.skip(msg)
def build_model_config(self, model: str | None = None, **kwargs) -> ModelConfig:
if model is None:
model = self.default
return ModelConfig(
**{
"model": model,
"revision": self.revision,
"trust_remote_code": self.trust_remote_code,
"hf_overrides": self.hf_overrides,
"enable_prompt_embeds": self.require_embed_inputs,
"enable_mm_embeds": self.require_embed_inputs,
"enforce_eager": self.enforce_eager,
"dtype": self.dtype,
**kwargs,
}
)
def build_renderer_config(
self, model: str | None = None, **kwargs
) -> RendererConfig:
model_config = self.build_model_config(model, **kwargs)
return RendererConfig(
model_config=model_config,
tokenizer=self.tokenizer or model_config.model,
tokenizer_mode=self.tokenizer_mode,
skip_tokenizer_init=self.require_embed_inputs,
)
_TEXT_GENERATION_EXAMPLE_MODELS = { _TEXT_GENERATION_EXAMPLE_MODELS = {
# [Decoder-only] # [Decoder-only]
......
...@@ -13,6 +13,7 @@ from transformers import PretrainedConfig ...@@ -13,6 +13,7 @@ from transformers import PretrainedConfig
from vllm.config.model import ModelConfig, ModelDType, RunnerOption from vllm.config.model import ModelConfig, ModelDType, RunnerOption
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
from vllm.multimodal.processing import InputProcessingContext from vllm.multimodal.processing import InputProcessingContext
from vllm.tokenizers import cached_tokenizer_from_config
from .. import ci_envs from .. import ci_envs
from .registry import HF_EXAMPLE_MODELS from .registry import HF_EXAMPLE_MODELS
...@@ -295,18 +296,30 @@ def build_model_context( ...@@ -295,18 +296,30 @@ def build_model_context(
model_config_kwargs = model_config_kwargs or {} model_config_kwargs = model_config_kwargs or {}
limit_mm_per_prompt = limit_mm_per_prompt or {} limit_mm_per_prompt = limit_mm_per_prompt or {}
renderer_config = model_info.build_renderer_config( model_config = ModelConfig(
model_id, model_id,
runner=runner, runner=runner,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=model_info.tokenizer_mode,
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
dtype=dtype, dtype=dtype,
seed=0, seed=0,
mm_processor_kwargs=mm_processor_kwargs, mm_processor_kwargs=mm_processor_kwargs,
limit_mm_per_prompt=limit_mm_per_prompt, limit_mm_per_prompt=limit_mm_per_prompt,
mm_processor_cache_gb=mm_processor_cache_gb, mm_processor_cache_gb=mm_processor_cache_gb,
hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
**model_config_kwargs, **model_config_kwargs,
) )
return InputProcessingContext.from_config(renderer_config) return InputProcessingContext(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
def check_embeddings_close( def check_embeddings_close(
......
...@@ -6,7 +6,7 @@ import numpy as np ...@@ -6,7 +6,7 @@ import numpy as np
import pytest import pytest
import torch import torch
from vllm.config import ModelConfig, ParallelConfig, RendererConfig, VllmConfig from vllm.config import ModelConfig, ParallelConfig, VllmConfig
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.cache import ( from vllm.multimodal.cache import (
BaseMultiModalProcessorCache, BaseMultiModalProcessorCache,
...@@ -110,14 +110,11 @@ def _create_vllm_config( ...@@ -110,14 +110,11 @@ def _create_vllm_config(
mm_processor_cache_gb: float, mm_processor_cache_gb: float,
enable_ipc: bool, enable_ipc: bool,
): ):
model_config = ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_gb=mm_processor_cache_gb,
)
return VllmConfig( return VllmConfig(
model_config=model_config, model_config=ModelConfig(
renderer_config=RendererConfig(model_config=model_config), model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_gb=mm_processor_cache_gb,
),
parallel_config=ParallelConfig(data_parallel_size=1 if enable_ipc else 2), parallel_config=ParallelConfig(data_parallel_size=1 if enable_ipc else 2),
) )
...@@ -509,15 +506,13 @@ def _run_test_cache_eviction_shm( ...@@ -509,15 +506,13 @@ def _run_test_cache_eviction_shm(
def test_cache_eviction_shm_cache(): def test_cache_eviction_shm_cache():
model_config = ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_type="shm",
mm_shm_cache_max_object_size_mb=6,
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
)
vllm_config = VllmConfig( vllm_config = VllmConfig(
model_config=model_config, model_config=ModelConfig(
renderer_config=RendererConfig(model_config=model_config), model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_type="shm",
mm_shm_cache_max_object_size_mb=6,
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
),
) )
sender_cache = ShmObjectStoreSenderCache(vllm_config) sender_cache = ShmObjectStoreSenderCache(vllm_config)
receiver_cache = ShmObjectStoreReceiverCache(vllm_config, mp.Lock()) receiver_cache = ShmObjectStoreReceiverCache(vllm_config, mp.Lock())
......
...@@ -7,7 +7,7 @@ from contextlib import nullcontext ...@@ -7,7 +7,7 @@ from contextlib import nullcontext
import numpy as np import numpy as np
import pytest import pytest
from vllm.config import ModelConfig, RendererConfig from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.processing import ( from vllm.multimodal.processing import (
InputProcessingContext, InputProcessingContext,
...@@ -920,9 +920,8 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid): ...@@ -920,9 +920,8 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
model=model_id, model=model_id,
limit_mm_per_prompt=limit_mm_per_prompt, limit_mm_per_prompt=limit_mm_per_prompt,
) )
renderer_config = RendererConfig(model_config=model_config)
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(model_config)
processor._supported_mm_limits = {"image": num_supported} processor._supported_mm_limits = {"image": num_supported}
profiler = MultiModalProfiler(processor) profiler = MultiModalProfiler(processor)
...@@ -956,9 +955,8 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid): ...@@ -956,9 +955,8 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
model=model_id, model=model_id,
limit_mm_per_prompt=limit_mm_per_prompt, limit_mm_per_prompt=limit_mm_per_prompt,
) )
renderer_config = RendererConfig(model_config=model_config)
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config) processor = MULTIMODAL_REGISTRY.create_processor(model_config)
rng = np.random.RandomState(0) rng = np.random.RandomState(0)
image = random_image(rng, min_wh=128, max_wh=256) image = random_image(rng, min_wh=128, max_wh=256)
...@@ -1014,13 +1012,11 @@ def test_hf_processor_init_kwargs( ...@@ -1014,13 +1012,11 @@ def test_hf_processor_init_kwargs(
inference_kwargs, inference_kwargs,
expected_kwargs, expected_kwargs,
): ):
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs) ctx = InputProcessingContext(
renderer_config = RendererConfig( model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
model_config=model_config, tokenizer=None,
tokenizer=model_id,
) )
ctx = InputProcessingContext.from_config(renderer_config)
processor = ctx.get_hf_processor( processor = ctx.get_hf_processor(
DummyProcessor, # type: ignore[arg-type] DummyProcessor, # type: ignore[arg-type]
**inference_kwargs, **inference_kwargs,
...@@ -1049,13 +1045,11 @@ def test_hf_processor_call_kwargs( ...@@ -1049,13 +1045,11 @@ def test_hf_processor_call_kwargs(
inference_kwargs, inference_kwargs,
expected_kwargs, expected_kwargs,
): ):
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs) ctx = InputProcessingContext(
renderer_config = RendererConfig( model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
model_config=model_config, tokenizer=None,
tokenizer=model_id,
) )
ctx = InputProcessingContext.from_config(renderer_config)
processor = ctx.get_hf_processor(DummyProcessor) # type: ignore[arg-type] processor = ctx.get_hf_processor(DummyProcessor) # type: ignore[arg-type]
result = ctx.call_hf_processor(processor, {}, inference_kwargs) result = ctx.call_hf_processor(processor, {}, inference_kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment