Unverified Commit e83b7e37 authored by Cyrus Leung's avatar Cyrus Leung Committed by GitHub
Browse files

Revert "[Renderer] Separate out `RendererConfig` from `ModelConfig` (#30145)" (#30199)

parent 27f4c2fd
......@@ -162,7 +162,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1
......
......@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
......
......@@ -116,7 +116,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1
......
......@@ -30,7 +30,7 @@ def test_processor_override(
limit_mm_per_prompt={"image": num_imgs},
mm_processor_cache_gb=mm_processor_cache_gb,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
config = processor.info.get_hf_config()
tokenizer = processor.info.get_tokenizer()
hf_processor = processor.info.get_hf_processor()
......
......@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info
seen_aspect_ratios = set[float]()
......@@ -140,7 +140,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [
(171, 152),
......@@ -173,7 +173,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]()
......
......@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": 1},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
info = processor.info
seen_aspect_ratios = set[float]()
......@@ -138,7 +138,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [
(171, 152),
......@@ -171,7 +171,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
seen_aspect_ratios = set[float]()
image_sizes = list[ImageSize]()
......
......@@ -24,7 +24,7 @@ def test_processor_override(
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
prompt = "<image>" * num_imgs
image = Image.new("RGB", size=(364, 364))
mm_data = {"image": [image] * num_imgs}
......@@ -83,7 +83,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs=None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
image_ratios = [
(171, 152),
......
......@@ -25,7 +25,7 @@ def test_profiling(model_id: str, max_model_len: int):
limit_mm_per_prompt=mm_counts,
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
profiler = MultiModalProfiler(processor)
decoder_dummy_data = profiler.get_decoder_dummy_data(
......
......@@ -118,7 +118,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": len(size_factors)},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
min_num = min_dynamic_patch if dynamic_image_size else 1
......
......@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
......
......@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
......
......@@ -34,7 +34,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
tokenizer = processor.info.get_tokenizer()
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
......
......@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs=mm_processor_kwargs if kwargs_on_init else None,
limit_mm_per_prompt={"image": num_imgs},
)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(ctx.model_config)
hf_processor_mm_kwargs = {} if kwargs_on_init else mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
......
......@@ -11,7 +11,7 @@ import pytest
import torch.nn as nn
from PIL import Image
from vllm.config import ModelConfig, RendererConfig, VllmConfig, set_current_vllm_config
from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config
from vllm.config.multimodal import (
AudioDummyOptions,
BaseDummyOptions,
......@@ -31,6 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
from vllm.multimodal.processing import BaseMultiModalProcessor, InputProcessingContext
from vllm.multimodal.utils import group_mm_kwargs_by_modality
from vllm.platforms import current_platform
from vllm.tokenizers import cached_tokenizer_from_config
from vllm.utils.collection_utils import is_list_of
from vllm.utils.torch_utils import set_default_torch_dtype
......@@ -149,10 +150,7 @@ def initialize_dummy_model(
backend="nccl",
)
initialize_model_parallel(tensor_model_parallel_size=1)
vllm_config = VllmConfig(
model_config=model_config,
renderer_config=RendererConfig(model_config=model_config),
)
vllm_config = VllmConfig(model_config=model_config)
with set_current_vllm_config(vllm_config=vllm_config):
with set_default_torch_dtype(model_config.dtype):
model = model_cls(vllm_config=vllm_config)
......@@ -184,12 +182,19 @@ def test_model_tensor_schema(model_id: str):
else:
dtype = model_info.dtype
renderer_config = model_info.build_renderer_config(
model_config = ModelConfig(
model_id,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=model_info.tokenizer_mode,
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
hf_overrides=hf_overrides_fn,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
dtype=dtype,
)
model_config = renderer_config.model_config
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
assert supports_multimodal(model_cls)
......@@ -207,7 +212,10 @@ def test_model_tensor_schema(model_id: str):
if not any(inputs_parse_methods):
pytest.skip(f"{model_arch} does not support tensor schema validation.")
ctx = InputProcessingContext.from_config(renderer_config)
ctx = InputProcessingContext(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
processing_info = factories.info(ctx)
supported_mm_limits = processing_info.get_supported_mm_limits()
limit_mm_per_prompt = {
......
......@@ -3,7 +3,7 @@
import pytest
from vllm.assets.image import ImageAsset
from vllm.config import ModelConfig, RendererConfig
from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
......@@ -13,9 +13,8 @@ def test_multimodal_processor(model_id):
model=model_id,
model_impl="transformers",
)
renderer_config = RendererConfig(model_config=model_config)
mm_processor = MULTIMODAL_REGISTRY.create_processor(renderer_config)
mm_processor = MULTIMODAL_REGISTRY.create_processor(model_config)
image_pil = ImageAsset("cherry_blossom").pil_image
mm_data = {"image": image_pil}
......
......@@ -7,6 +7,7 @@ import torch
import transformers
from transformers import AutoConfig, PreTrainedModel
from vllm.config import ModelConfig
from vllm.model_executor.models.utils import WeightsMapper
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.transformers_utils.config import try_get_safetensors_metadata
......@@ -49,11 +50,37 @@ def test_hf_model_weights_mapper(model_arch: str):
model_info.check_available_online(on_fail="skip")
model_info.check_transformers_version(on_fail="skip")
model_config = model_info.build_model_config(config_format="hf")
is_mistral_model = model_arch in [
"Mistral3ForConditionalGeneration",
"PixtralForConditionalGeneration",
"VoxtralForConditionalGeneration",
]
if not is_mistral_model or model_info.tokenizer_mode == "mistral":
tokenizer_mode = model_info.tokenizer_mode
else:
tokenizer_mode = "hf"
model_id = model_info.default
model_config = ModelConfig(
model_id,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=tokenizer_mode,
config_format="hf",
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
dtype=model_info.dtype,
)
model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config)
original_weights = create_repo_dummy_weights(model_config.model)
hf_dummy_model = create_dummy_model(model_config.model, model_arch)
original_weights = create_repo_dummy_weights(model_id)
hf_dummy_model = create_dummy_model(model_id, model_arch)
hf_converted_weights = hf_dummy_model.named_parameters()
hf_converted_buffers = hf_dummy_model.named_buffers()
mapper: WeightsMapper = model_cls.hf_to_vllm_mapper
......
......@@ -9,8 +9,7 @@ import pytest
from packaging.version import Version
from transformers import __version__ as TRANSFORMERS_VERSION
from vllm.config.model import ModelConfig, ModelDType
from vllm.config.renderer import RendererConfig, TokenizerMode
from vllm.config.model import ModelDType, TokenizerMode
@dataclass(frozen=True)
......@@ -171,36 +170,6 @@ class _HfExamplesInfo:
else:
pytest.skip(msg)
def build_model_config(self, model: str | None = None, **kwargs) -> ModelConfig:
if model is None:
model = self.default
return ModelConfig(
**{
"model": model,
"revision": self.revision,
"trust_remote_code": self.trust_remote_code,
"hf_overrides": self.hf_overrides,
"enable_prompt_embeds": self.require_embed_inputs,
"enable_mm_embeds": self.require_embed_inputs,
"enforce_eager": self.enforce_eager,
"dtype": self.dtype,
**kwargs,
}
)
def build_renderer_config(
self, model: str | None = None, **kwargs
) -> RendererConfig:
model_config = self.build_model_config(model, **kwargs)
return RendererConfig(
model_config=model_config,
tokenizer=self.tokenizer or model_config.model,
tokenizer_mode=self.tokenizer_mode,
skip_tokenizer_init=self.require_embed_inputs,
)
_TEXT_GENERATION_EXAMPLE_MODELS = {
# [Decoder-only]
......
......@@ -13,6 +13,7 @@ from transformers import PretrainedConfig
from vllm.config.model import ModelConfig, ModelDType, RunnerOption
from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs
from vllm.multimodal.processing import InputProcessingContext
from vllm.tokenizers import cached_tokenizer_from_config
from .. import ci_envs
from .registry import HF_EXAMPLE_MODELS
......@@ -295,18 +296,30 @@ def build_model_context(
model_config_kwargs = model_config_kwargs or {}
limit_mm_per_prompt = limit_mm_per_prompt or {}
renderer_config = model_info.build_renderer_config(
model_config = ModelConfig(
model_id,
runner=runner,
tokenizer=model_info.tokenizer or model_id,
tokenizer_mode=model_info.tokenizer_mode,
revision=model_info.revision,
trust_remote_code=model_info.trust_remote_code,
dtype=dtype,
seed=0,
mm_processor_kwargs=mm_processor_kwargs,
limit_mm_per_prompt=limit_mm_per_prompt,
mm_processor_cache_gb=mm_processor_cache_gb,
hf_overrides=model_info.hf_overrides,
skip_tokenizer_init=model_info.require_embed_inputs,
enable_prompt_embeds=model_info.require_embed_inputs,
enable_mm_embeds=model_info.require_embed_inputs,
enforce_eager=model_info.enforce_eager,
**model_config_kwargs,
)
return InputProcessingContext.from_config(renderer_config)
return InputProcessingContext(
model_config,
tokenizer=cached_tokenizer_from_config(model_config),
)
def check_embeddings_close(
......
......@@ -6,7 +6,7 @@ import numpy as np
import pytest
import torch
from vllm.config import ModelConfig, ParallelConfig, RendererConfig, VllmConfig
from vllm.config import ModelConfig, ParallelConfig, VllmConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.cache import (
BaseMultiModalProcessorCache,
......@@ -110,14 +110,11 @@ def _create_vllm_config(
mm_processor_cache_gb: float,
enable_ipc: bool,
):
model_config = ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_gb=mm_processor_cache_gb,
)
return VllmConfig(
model_config=model_config,
renderer_config=RendererConfig(model_config=model_config),
model_config=ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_gb=mm_processor_cache_gb,
),
parallel_config=ParallelConfig(data_parallel_size=1 if enable_ipc else 2),
)
......@@ -509,15 +506,13 @@ def _run_test_cache_eviction_shm(
def test_cache_eviction_shm_cache():
model_config = ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_type="shm",
mm_shm_cache_max_object_size_mb=6,
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
)
vllm_config = VllmConfig(
model_config=model_config,
renderer_config=RendererConfig(model_config=model_config),
model_config=ModelConfig(
model="llava-hf/llava-onevision-qwen2-0.5b-ov-hf",
mm_processor_cache_type="shm",
mm_shm_cache_max_object_size_mb=6,
mm_processor_cache_gb=15.2 * MiB_bytes / GiB_bytes,
),
)
sender_cache = ShmObjectStoreSenderCache(vllm_config)
receiver_cache = ShmObjectStoreReceiverCache(vllm_config, mp.Lock())
......
......@@ -7,7 +7,7 @@ from contextlib import nullcontext
import numpy as np
import pytest
from vllm.config import ModelConfig, RendererConfig
from vllm.config import ModelConfig
from vllm.multimodal import MULTIMODAL_REGISTRY
from vllm.multimodal.processing import (
InputProcessingContext,
......@@ -920,9 +920,8 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
model=model_id,
limit_mm_per_prompt=limit_mm_per_prompt,
)
renderer_config = RendererConfig(model_config=model_config)
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
processor._supported_mm_limits = {"image": num_supported}
profiler = MultiModalProfiler(processor)
......@@ -956,9 +955,8 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
model=model_id,
limit_mm_per_prompt=limit_mm_per_prompt,
)
renderer_config = RendererConfig(model_config=model_config)
processor = MULTIMODAL_REGISTRY.create_processor(renderer_config)
processor = MULTIMODAL_REGISTRY.create_processor(model_config)
rng = np.random.RandomState(0)
image = random_image(rng, min_wh=128, max_wh=256)
......@@ -1014,13 +1012,11 @@ def test_hf_processor_init_kwargs(
inference_kwargs,
expected_kwargs,
):
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs)
renderer_config = RendererConfig(
model_config=model_config,
tokenizer=model_id,
ctx = InputProcessingContext(
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
tokenizer=None,
)
ctx = InputProcessingContext.from_config(renderer_config)
processor = ctx.get_hf_processor(
DummyProcessor, # type: ignore[arg-type]
**inference_kwargs,
......@@ -1049,13 +1045,11 @@ def test_hf_processor_call_kwargs(
inference_kwargs,
expected_kwargs,
):
model_config = ModelConfig(model_id, mm_processor_kwargs=config_kwargs)
renderer_config = RendererConfig(
model_config=model_config,
tokenizer=model_id,
ctx = InputProcessingContext(
model_config=ModelConfig(model_id, mm_processor_kwargs=config_kwargs),
tokenizer=None,
)
ctx = InputProcessingContext.from_config(renderer_config)
processor = ctx.get_hf_processor(DummyProcessor) # type: ignore[arg-type]
result = ctx.call_hf_processor(processor, {}, inference_kwargs)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment