Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e83b7e37
Unverified
Commit
e83b7e37
authored
Dec 07, 2025
by
Cyrus Leung
Committed by
GitHub
Dec 07, 2025
Browse files
Revert "[Renderer] Separate out `RendererConfig` from `ModelConfig` (#30145)" (#30199)
parent
27f4c2fd
Changes
105
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
102 additions
and
97 deletions
+102
-97
tests/models/multimodal/processing/test_h2ovl.py
tests/models/multimodal/processing/test_h2ovl.py
+1
-1
tests/models/multimodal/processing/test_idefics3.py
tests/models/multimodal/processing/test_idefics3.py
+1
-1
tests/models/multimodal/processing/test_internvl.py
tests/models/multimodal/processing/test_internvl.py
+1
-1
tests/models/multimodal/processing/test_llama4.py
tests/models/multimodal/processing/test_llama4.py
+1
-1
tests/models/multimodal/processing/test_llava_next.py
tests/models/multimodal/processing/test_llava_next.py
+3
-3
tests/models/multimodal/processing/test_llava_onevision.py
tests/models/multimodal/processing/test_llava_onevision.py
+3
-3
tests/models/multimodal/processing/test_minimax_vl_01.py
tests/models/multimodal/processing/test_minimax_vl_01.py
+2
-2
tests/models/multimodal/processing/test_mllama4.py
tests/models/multimodal/processing/test_mllama4.py
+1
-1
tests/models/multimodal/processing/test_nemotron_vl.py
tests/models/multimodal/processing/test_nemotron_vl.py
+1
-1
tests/models/multimodal/processing/test_phi3v.py
tests/models/multimodal/processing/test_phi3v.py
+1
-1
tests/models/multimodal/processing/test_phi4mm.py
tests/models/multimodal/processing/test_phi4mm.py
+1
-1
tests/models/multimodal/processing/test_qwen2_vl.py
tests/models/multimodal/processing/test_qwen2_vl.py
+1
-1
tests/models/multimodal/processing/test_smolvlm.py
tests/models/multimodal/processing/test_smolvlm.py
+1
-1
tests/models/multimodal/processing/test_tensor_schema.py
tests/models/multimodal/processing/test_tensor_schema.py
+16
-8
tests/models/multimodal/processing/test_transformers.py
tests/models/multimodal/processing/test_transformers.py
+2
-3
tests/models/multimodal/test_mapping.py
tests/models/multimodal/test_mapping.py
+30
-3
tests/models/registry.py
tests/models/registry.py
+1
-32
tests/models/utils.py
tests/models/utils.py
+15
-2
tests/multimodal/test_cache.py
tests/multimodal/test_cache.py
+11
-16
tests/multimodal/test_processing.py
tests/multimodal/test_processing.py
+9
-15
No files found.
tests/models/multimodal/processing/test_h2ovl.py
View file @
e83b7e37
...
@@ -162,7 +162,7 @@ def test_processor_override(
...
@@ -162,7 +162,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
...
...
tests/models/multimodal/processing/test_idefics3.py
View file @
e83b7e37
...
@@ -38,7 +38,7 @@ def test_processor_override(
...
@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
# Build the image str / prompt based on the number of images we pass
...
...
tests/models/multimodal/processing/test_internvl.py
View file @
e83b7e37
...
@@ -116,7 +116,7 @@ def test_processor_override(
...
@@ -116,7 +116,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
...
...
tests/models/multimodal/processing/test_llama4.py
View file @
e83b7e37
...
@@ -30,7 +30,7 @@ def test_processor_override(
...
@@ -30,7 +30,7 @@ def test_processor_override(
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
mm_processor_cache_gb
=
mm_processor_cache_gb
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
config
=
processor
.
info
.
get_hf_config
()
config
=
processor
.
info
.
get_hf_config
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
hf_processor
=
processor
.
info
.
get_hf_processor
()
hf_processor
=
processor
.
info
.
get_hf_processor
()
...
...
tests/models/multimodal/processing/test_llava_next.py
View file @
e83b7e37
...
@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
...
@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
"image"
:
1
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
info
=
processor
.
info
info
=
processor
.
info
seen_aspect_ratios
=
set
[
float
]()
seen_aspect_ratios
=
set
[
float
]()
...
@@ -140,7 +140,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
...
@@ -140,7 +140,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
image_ratios
=
[
image_ratios
=
[
(
171
,
152
),
(
171
,
152
),
...
@@ -173,7 +173,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
...
@@ -173,7 +173,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
seen_aspect_ratios
=
set
[
float
]()
seen_aspect_ratios
=
set
[
float
]()
image_sizes
=
list
[
ImageSize
]()
image_sizes
=
list
[
ImageSize
]()
...
...
tests/models/multimodal/processing/test_llava_onevision.py
View file @
e83b7e37
...
@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
...
@@ -42,7 +42,7 @@ def test_processor_max_tokens(model_id):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
1
},
limit_mm_per_prompt
=
{
"image"
:
1
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
info
=
processor
.
info
info
=
processor
.
info
seen_aspect_ratios
=
set
[
float
]()
seen_aspect_ratios
=
set
[
float
]()
...
@@ -138,7 +138,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
...
@@ -138,7 +138,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
image_ratios
=
[
image_ratios
=
[
(
171
,
152
),
(
171
,
152
),
...
@@ -171,7 +171,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
...
@@ -171,7 +171,7 @@ def test_processor_prompt_replacements_all(model_id, num_imgs):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
seen_aspect_ratios
=
set
[
float
]()
seen_aspect_ratios
=
set
[
float
]()
image_sizes
=
list
[
ImageSize
]()
image_sizes
=
list
[
ImageSize
]()
...
...
tests/models/multimodal/processing/test_minimax_vl_01.py
View file @
e83b7e37
...
@@ -24,7 +24,7 @@ def test_processor_override(
...
@@ -24,7 +24,7 @@ def test_processor_override(
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
prompt
=
"<image>"
*
num_imgs
prompt
=
"<image>"
*
num_imgs
image
=
Image
.
new
(
"RGB"
,
size
=
(
364
,
364
))
image
=
Image
.
new
(
"RGB"
,
size
=
(
364
,
364
))
mm_data
=
{
"image"
:
[
image
]
*
num_imgs
}
mm_data
=
{
"image"
:
[
image
]
*
num_imgs
}
...
@@ -83,7 +83,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
...
@@ -83,7 +83,7 @@ def test_processor_prompt_replacements_regression(model_id, num_imgs):
mm_processor_kwargs
=
None
,
mm_processor_kwargs
=
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
image_ratios
=
[
image_ratios
=
[
(
171
,
152
),
(
171
,
152
),
...
...
tests/models/multimodal/processing/test_mllama4.py
View file @
e83b7e37
...
@@ -25,7 +25,7 @@ def test_profiling(model_id: str, max_model_len: int):
...
@@ -25,7 +25,7 @@ def test_profiling(model_id: str, max_model_len: int):
limit_mm_per_prompt
=
mm_counts
,
limit_mm_per_prompt
=
mm_counts
,
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
profiler
=
MultiModalProfiler
(
processor
)
profiler
=
MultiModalProfiler
(
processor
)
decoder_dummy_data
=
profiler
.
get_decoder_dummy_data
(
decoder_dummy_data
=
profiler
.
get_decoder_dummy_data
(
...
...
tests/models/multimodal/processing/test_nemotron_vl.py
View file @
e83b7e37
...
@@ -118,7 +118,7 @@ def test_processor_override(
...
@@ -118,7 +118,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
limit_mm_per_prompt
=
{
"image"
:
len
(
size_factors
)},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
min_num
=
min_dynamic_patch
if
dynamic_image_size
else
1
...
...
tests/models/multimodal/processing/test_phi3v.py
View file @
e83b7e37
...
@@ -39,7 +39,7 @@ def test_processor_override(
...
@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
# Build the image str / prompt based on the number of images we pass
...
...
tests/models/multimodal/processing/test_phi4mm.py
View file @
e83b7e37
...
@@ -39,7 +39,7 @@ def test_processor_override(
...
@@ -39,7 +39,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
# Build the image str / prompt based on the number of images we pass
...
...
tests/models/multimodal/processing/test_qwen2_vl.py
View file @
e83b7e37
...
@@ -34,7 +34,7 @@ def test_processor_override(
...
@@ -34,7 +34,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
tokenizer
=
processor
.
info
.
get_tokenizer
()
tokenizer
=
processor
.
info
.
get_tokenizer
()
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
...
...
tests/models/multimodal/processing/test_smolvlm.py
View file @
e83b7e37
...
@@ -38,7 +38,7 @@ def test_processor_override(
...
@@ -38,7 +38,7 @@ def test_processor_override(
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
mm_processor_kwargs
=
mm_processor_kwargs
if
kwargs_on_init
else
None
,
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
limit_mm_per_prompt
=
{
"image"
:
num_imgs
},
)
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
ctx
.
model
_config
)
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
hf_processor_mm_kwargs
=
{}
if
kwargs_on_init
else
mm_processor_kwargs
# Build the image str / prompt based on the number of images we pass
# Build the image str / prompt based on the number of images we pass
...
...
tests/models/multimodal/processing/test_tensor_schema.py
View file @
e83b7e37
...
@@ -11,7 +11,7 @@ import pytest
...
@@ -11,7 +11,7 @@ import pytest
import
torch.nn
as
nn
import
torch.nn
as
nn
from
PIL
import
Image
from
PIL
import
Image
from
vllm.config
import
ModelConfig
,
RendererConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.config
import
ModelConfig
,
VllmConfig
,
set_current_vllm_config
from
vllm.config.multimodal
import
(
from
vllm.config.multimodal
import
(
AudioDummyOptions
,
AudioDummyOptions
,
BaseDummyOptions
,
BaseDummyOptions
,
...
@@ -31,6 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
...
@@ -31,6 +31,7 @@ from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
,
InputProcessingContext
from
vllm.multimodal.processing
import
BaseMultiModalProcessor
,
InputProcessingContext
from
vllm.multimodal.utils
import
group_mm_kwargs_by_modality
from
vllm.multimodal.utils
import
group_mm_kwargs_by_modality
from
vllm.platforms
import
current_platform
from
vllm.platforms
import
current_platform
from
vllm.tokenizers
import
cached_tokenizer_from_config
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.utils.collection_utils
import
is_list_of
from
vllm.utils.torch_utils
import
set_default_torch_dtype
from
vllm.utils.torch_utils
import
set_default_torch_dtype
...
@@ -149,10 +150,7 @@ def initialize_dummy_model(
...
@@ -149,10 +150,7 @@ def initialize_dummy_model(
backend
=
"nccl"
,
backend
=
"nccl"
,
)
)
initialize_model_parallel
(
tensor_model_parallel_size
=
1
)
initialize_model_parallel
(
tensor_model_parallel_size
=
1
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
model_config
)
model_config
=
model_config
,
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
)
with
set_current_vllm_config
(
vllm_config
=
vllm_config
):
with
set_current_vllm_config
(
vllm_config
=
vllm_config
):
with
set_default_torch_dtype
(
model_config
.
dtype
):
with
set_default_torch_dtype
(
model_config
.
dtype
):
model
=
model_cls
(
vllm_config
=
vllm_config
)
model
=
model_cls
(
vllm_config
=
vllm_config
)
...
@@ -184,12 +182,19 @@ def test_model_tensor_schema(model_id: str):
...
@@ -184,12 +182,19 @@ def test_model_tensor_schema(model_id: str):
else
:
else
:
dtype
=
model_info
.
dtype
dtype
=
model_info
.
dtype
renderer
_config
=
m
odel
_info
.
build_renderer_c
onfig
(
model
_config
=
M
odel
C
onfig
(
model_id
,
model_id
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
hf_overrides_fn
,
hf_overrides
=
hf_overrides_fn
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
dtype
,
dtype
=
dtype
,
)
)
model_config
=
renderer_config
.
model_config
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
assert
supports_multimodal
(
model_cls
)
assert
supports_multimodal
(
model_cls
)
...
@@ -207,7 +212,10 @@ def test_model_tensor_schema(model_id: str):
...
@@ -207,7 +212,10 @@ def test_model_tensor_schema(model_id: str):
if
not
any
(
inputs_parse_methods
):
if
not
any
(
inputs_parse_methods
):
pytest
.
skip
(
f
"
{
model_arch
}
does not support tensor schema validation."
)
pytest
.
skip
(
f
"
{
model_arch
}
does not support tensor schema validation."
)
ctx
=
InputProcessingContext
.
from_config
(
renderer_config
)
ctx
=
InputProcessingContext
(
model_config
,
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
)
processing_info
=
factories
.
info
(
ctx
)
processing_info
=
factories
.
info
(
ctx
)
supported_mm_limits
=
processing_info
.
get_supported_mm_limits
()
supported_mm_limits
=
processing_info
.
get_supported_mm_limits
()
limit_mm_per_prompt
=
{
limit_mm_per_prompt
=
{
...
...
tests/models/multimodal/processing/test_transformers.py
View file @
e83b7e37
...
@@ -3,7 +3,7 @@
...
@@ -3,7 +3,7 @@
import
pytest
import
pytest
from
vllm.assets.image
import
ImageAsset
from
vllm.assets.image
import
ImageAsset
from
vllm.config
import
ModelConfig
,
RendererConfig
from
vllm.config
import
ModelConfig
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
...
@@ -13,9 +13,8 @@ def test_multimodal_processor(model_id):
...
@@ -13,9 +13,8 @@ def test_multimodal_processor(model_id):
model
=
model_id
,
model
=
model_id
,
model_impl
=
"transformers"
,
model_impl
=
"transformers"
,
)
)
renderer_config
=
RendererConfig
(
model_config
=
model_config
)
mm_processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
renderer
_config
)
mm_processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
model
_config
)
image_pil
=
ImageAsset
(
"cherry_blossom"
).
pil_image
image_pil
=
ImageAsset
(
"cherry_blossom"
).
pil_image
mm_data
=
{
"image"
:
image_pil
}
mm_data
=
{
"image"
:
image_pil
}
...
...
tests/models/multimodal/test_mapping.py
View file @
e83b7e37
...
@@ -7,6 +7,7 @@ import torch
...
@@ -7,6 +7,7 @@ import torch
import
transformers
import
transformers
from
transformers
import
AutoConfig
,
PreTrainedModel
from
transformers
import
AutoConfig
,
PreTrainedModel
from
vllm.config
import
ModelConfig
from
vllm.model_executor.models.utils
import
WeightsMapper
from
vllm.model_executor.models.utils
import
WeightsMapper
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.transformers_utils.config
import
try_get_safetensors_metadata
from
vllm.transformers_utils.config
import
try_get_safetensors_metadata
...
@@ -49,11 +50,37 @@ def test_hf_model_weights_mapper(model_arch: str):
...
@@ -49,11 +50,37 @@ def test_hf_model_weights_mapper(model_arch: str):
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_available_online
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
model_info
.
check_transformers_version
(
on_fail
=
"skip"
)
model_config
=
model_info
.
build_model_config
(
config_format
=
"hf"
)
is_mistral_model
=
model_arch
in
[
"Mistral3ForConditionalGeneration"
,
"PixtralForConditionalGeneration"
,
"VoxtralForConditionalGeneration"
,
]
if
not
is_mistral_model
or
model_info
.
tokenizer_mode
==
"mistral"
:
tokenizer_mode
=
model_info
.
tokenizer_mode
else
:
tokenizer_mode
=
"hf"
model_id
=
model_info
.
default
model_config
=
ModelConfig
(
model_id
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
tokenizer_mode
,
config_format
=
"hf"
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
dtype
=
model_info
.
dtype
,
)
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
model_cls
=
MULTIMODAL_REGISTRY
.
_get_model_cls
(
model_config
)
original_weights
=
create_repo_dummy_weights
(
model_
config
.
model
)
original_weights
=
create_repo_dummy_weights
(
model_
id
)
hf_dummy_model
=
create_dummy_model
(
model_
config
.
model
,
model_arch
)
hf_dummy_model
=
create_dummy_model
(
model_
id
,
model_arch
)
hf_converted_weights
=
hf_dummy_model
.
named_parameters
()
hf_converted_weights
=
hf_dummy_model
.
named_parameters
()
hf_converted_buffers
=
hf_dummy_model
.
named_buffers
()
hf_converted_buffers
=
hf_dummy_model
.
named_buffers
()
mapper
:
WeightsMapper
=
model_cls
.
hf_to_vllm_mapper
mapper
:
WeightsMapper
=
model_cls
.
hf_to_vllm_mapper
...
...
tests/models/registry.py
View file @
e83b7e37
...
@@ -9,8 +9,7 @@ import pytest
...
@@ -9,8 +9,7 @@ import pytest
from
packaging.version
import
Version
from
packaging.version
import
Version
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
transformers
import
__version__
as
TRANSFORMERS_VERSION
from
vllm.config.model
import
ModelConfig
,
ModelDType
from
vllm.config.model
import
ModelDType
,
TokenizerMode
from
vllm.config.renderer
import
RendererConfig
,
TokenizerMode
@
dataclass
(
frozen
=
True
)
@
dataclass
(
frozen
=
True
)
...
@@ -171,36 +170,6 @@ class _HfExamplesInfo:
...
@@ -171,36 +170,6 @@ class _HfExamplesInfo:
else
:
else
:
pytest
.
skip
(
msg
)
pytest
.
skip
(
msg
)
def
build_model_config
(
self
,
model
:
str
|
None
=
None
,
**
kwargs
)
->
ModelConfig
:
if
model
is
None
:
model
=
self
.
default
return
ModelConfig
(
**
{
"model"
:
model
,
"revision"
:
self
.
revision
,
"trust_remote_code"
:
self
.
trust_remote_code
,
"hf_overrides"
:
self
.
hf_overrides
,
"enable_prompt_embeds"
:
self
.
require_embed_inputs
,
"enable_mm_embeds"
:
self
.
require_embed_inputs
,
"enforce_eager"
:
self
.
enforce_eager
,
"dtype"
:
self
.
dtype
,
**
kwargs
,
}
)
def
build_renderer_config
(
self
,
model
:
str
|
None
=
None
,
**
kwargs
)
->
RendererConfig
:
model_config
=
self
.
build_model_config
(
model
,
**
kwargs
)
return
RendererConfig
(
model_config
=
model_config
,
tokenizer
=
self
.
tokenizer
or
model_config
.
model
,
tokenizer_mode
=
self
.
tokenizer_mode
,
skip_tokenizer_init
=
self
.
require_embed_inputs
,
)
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
_TEXT_GENERATION_EXAMPLE_MODELS
=
{
# [Decoder-only]
# [Decoder-only]
...
...
tests/models/utils.py
View file @
e83b7e37
...
@@ -13,6 +13,7 @@ from transformers import PretrainedConfig
...
@@ -13,6 +13,7 @@ from transformers import PretrainedConfig
from
vllm.config.model
import
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.config.model
import
ModelConfig
,
ModelDType
,
RunnerOption
from
vllm.logprobs
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.logprobs
import
Logprob
,
PromptLogprobs
,
SampleLogprobs
from
vllm.multimodal.processing
import
InputProcessingContext
from
vllm.multimodal.processing
import
InputProcessingContext
from
vllm.tokenizers
import
cached_tokenizer_from_config
from
..
import
ci_envs
from
..
import
ci_envs
from
.registry
import
HF_EXAMPLE_MODELS
from
.registry
import
HF_EXAMPLE_MODELS
...
@@ -295,18 +296,30 @@ def build_model_context(
...
@@ -295,18 +296,30 @@ def build_model_context(
model_config_kwargs
=
model_config_kwargs
or
{}
model_config_kwargs
=
model_config_kwargs
or
{}
limit_mm_per_prompt
=
limit_mm_per_prompt
or
{}
limit_mm_per_prompt
=
limit_mm_per_prompt
or
{}
renderer
_config
=
m
odel
_info
.
build_renderer_c
onfig
(
model
_config
=
M
odel
C
onfig
(
model_id
,
model_id
,
runner
=
runner
,
runner
=
runner
,
tokenizer
=
model_info
.
tokenizer
or
model_id
,
tokenizer_mode
=
model_info
.
tokenizer_mode
,
revision
=
model_info
.
revision
,
trust_remote_code
=
model_info
.
trust_remote_code
,
dtype
=
dtype
,
dtype
=
dtype
,
seed
=
0
,
seed
=
0
,
mm_processor_kwargs
=
mm_processor_kwargs
,
mm_processor_kwargs
=
mm_processor_kwargs
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
hf_overrides
=
model_info
.
hf_overrides
,
skip_tokenizer_init
=
model_info
.
require_embed_inputs
,
enable_prompt_embeds
=
model_info
.
require_embed_inputs
,
enable_mm_embeds
=
model_info
.
require_embed_inputs
,
enforce_eager
=
model_info
.
enforce_eager
,
**
model_config_kwargs
,
**
model_config_kwargs
,
)
)
return
InputProcessingContext
.
from_config
(
renderer_config
)
return
InputProcessingContext
(
model_config
,
tokenizer
=
cached_tokenizer_from_config
(
model_config
),
)
def
check_embeddings_close
(
def
check_embeddings_close
(
...
...
tests/multimodal/test_cache.py
View file @
e83b7e37
...
@@ -6,7 +6,7 @@ import numpy as np
...
@@ -6,7 +6,7 @@ import numpy as np
import
pytest
import
pytest
import
torch
import
torch
from
vllm.config
import
ModelConfig
,
ParallelConfig
,
RendererConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
ParallelConfig
,
VllmConfig
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.cache
import
(
from
vllm.multimodal.cache
import
(
BaseMultiModalProcessorCache
,
BaseMultiModalProcessorCache
,
...
@@ -110,14 +110,11 @@ def _create_vllm_config(
...
@@ -110,14 +110,11 @@ def _create_vllm_config(
mm_processor_cache_gb
:
float
,
mm_processor_cache_gb
:
float
,
enable_ipc
:
bool
,
enable_ipc
:
bool
,
):
):
model_config
=
ModelConfig
(
model
=
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
)
return
VllmConfig
(
return
VllmConfig
(
model_config
=
model_config
,
model_config
=
ModelConfig
(
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
model
=
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
mm_processor_cache_gb
=
mm_processor_cache_gb
,
),
parallel_config
=
ParallelConfig
(
data_parallel_size
=
1
if
enable_ipc
else
2
),
parallel_config
=
ParallelConfig
(
data_parallel_size
=
1
if
enable_ipc
else
2
),
)
)
...
@@ -509,15 +506,13 @@ def _run_test_cache_eviction_shm(
...
@@ -509,15 +506,13 @@ def _run_test_cache_eviction_shm(
def
test_cache_eviction_shm_cache
():
def
test_cache_eviction_shm_cache
():
model_config
=
ModelConfig
(
model
=
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
mm_processor_cache_type
=
"shm"
,
mm_shm_cache_max_object_size_mb
=
6
,
mm_processor_cache_gb
=
15.2
*
MiB_bytes
/
GiB_bytes
,
)
vllm_config
=
VllmConfig
(
vllm_config
=
VllmConfig
(
model_config
=
model_config
,
model_config
=
ModelConfig
(
renderer_config
=
RendererConfig
(
model_config
=
model_config
),
model
=
"llava-hf/llava-onevision-qwen2-0.5b-ov-hf"
,
mm_processor_cache_type
=
"shm"
,
mm_shm_cache_max_object_size_mb
=
6
,
mm_processor_cache_gb
=
15.2
*
MiB_bytes
/
GiB_bytes
,
),
)
)
sender_cache
=
ShmObjectStoreSenderCache
(
vllm_config
)
sender_cache
=
ShmObjectStoreSenderCache
(
vllm_config
)
receiver_cache
=
ShmObjectStoreReceiverCache
(
vllm_config
,
mp
.
Lock
())
receiver_cache
=
ShmObjectStoreReceiverCache
(
vllm_config
,
mp
.
Lock
())
...
...
tests/multimodal/test_processing.py
View file @
e83b7e37
...
@@ -7,7 +7,7 @@ from contextlib import nullcontext
...
@@ -7,7 +7,7 @@ from contextlib import nullcontext
import
numpy
as
np
import
numpy
as
np
import
pytest
import
pytest
from
vllm.config
import
ModelConfig
,
RendererConfig
from
vllm.config
import
ModelConfig
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal
import
MULTIMODAL_REGISTRY
from
vllm.multimodal.processing
import
(
from
vllm.multimodal.processing
import
(
InputProcessingContext
,
InputProcessingContext
,
...
@@ -920,9 +920,8 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
...
@@ -920,9 +920,8 @@ def test_limit_mm_per_prompt_dummy(model_id, limit, num_supported, is_valid):
model
=
model_id
,
model
=
model_id
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
)
)
renderer_config
=
RendererConfig
(
model_config
=
model_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
model
_config
)
processor
.
_supported_mm_limits
=
{
"image"
:
num_supported
}
processor
.
_supported_mm_limits
=
{
"image"
:
num_supported
}
profiler
=
MultiModalProfiler
(
processor
)
profiler
=
MultiModalProfiler
(
processor
)
...
@@ -956,9 +955,8 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
...
@@ -956,9 +955,8 @@ def test_limit_mm_per_prompt_apply(model_id, num_images, limit, is_valid):
model
=
model_id
,
model
=
model_id
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
limit_mm_per_prompt
=
limit_mm_per_prompt
,
)
)
renderer_config
=
RendererConfig
(
model_config
=
model_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
renderer
_config
)
processor
=
MULTIMODAL_REGISTRY
.
create_processor
(
model
_config
)
rng
=
np
.
random
.
RandomState
(
0
)
rng
=
np
.
random
.
RandomState
(
0
)
image
=
random_image
(
rng
,
min_wh
=
128
,
max_wh
=
256
)
image
=
random_image
(
rng
,
min_wh
=
128
,
max_wh
=
256
)
...
@@ -1014,13 +1012,11 @@ def test_hf_processor_init_kwargs(
...
@@ -1014,13 +1012,11 @@ def test_hf_processor_init_kwargs(
inference_kwargs
,
inference_kwargs
,
expected_kwargs
,
expected_kwargs
,
):
):
model_config
=
ModelConfig
(
model_id
,
mm_processor_kwargs
=
config_kwargs
)
ctx
=
InputProcessingContext
(
renderer_config
=
RendererConfig
(
model_config
=
ModelConfig
(
model_id
,
mm_processor_kwargs
=
config_kwargs
),
model_config
=
model_config
,
tokenizer
=
None
,
tokenizer
=
model_id
,
)
)
ctx
=
InputProcessingContext
.
from_config
(
renderer_config
)
processor
=
ctx
.
get_hf_processor
(
processor
=
ctx
.
get_hf_processor
(
DummyProcessor
,
# type: ignore[arg-type]
DummyProcessor
,
# type: ignore[arg-type]
**
inference_kwargs
,
**
inference_kwargs
,
...
@@ -1049,13 +1045,11 @@ def test_hf_processor_call_kwargs(
...
@@ -1049,13 +1045,11 @@ def test_hf_processor_call_kwargs(
inference_kwargs
,
inference_kwargs
,
expected_kwargs
,
expected_kwargs
,
):
):
model_config
=
ModelConfig
(
model_id
,
mm_processor_kwargs
=
config_kwargs
)
ctx
=
InputProcessingContext
(
renderer_config
=
RendererConfig
(
model_config
=
ModelConfig
(
model_id
,
mm_processor_kwargs
=
config_kwargs
),
model_config
=
model_config
,
tokenizer
=
None
,
tokenizer
=
model_id
,
)
)
ctx
=
InputProcessingContext
.
from_config
(
renderer_config
)
processor
=
ctx
.
get_hf_processor
(
DummyProcessor
)
# type: ignore[arg-type]
processor
=
ctx
.
get_hf_processor
(
DummyProcessor
)
# type: ignore[arg-type]
result
=
ctx
.
call_hf_processor
(
processor
,
{},
inference_kwargs
)
result
=
ctx
.
call_hf_processor
(
processor
,
{},
inference_kwargs
)
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment