Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
82551ad6
Unverified
Commit
82551ad6
authored
Mar 07, 2025
by
Cyrus Leung
Committed by
GitHub
Mar 06, 2025
Browse files
[Core] Don't use cache during multi-modal profiling (#14336)
parent
caac5c2e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
15 additions
and
5 deletions
+15
-5
vllm/inputs/registry.py
vllm/inputs/registry.py
+3
-1
vllm/multimodal/registry.py
vllm/multimodal/registry.py
+12
-4
No files found.
vllm/inputs/registry.py
View file @
82551ad6
...
...
@@ -331,7 +331,9 @@ class InputRegistry:
if
mm_registry
.
has_processor
(
model_config
):
tokenizer
=
cached_tokenizer_from_config
(
model_config
)
processor
=
mm_registry
.
create_processor
(
model_config
,
tokenizer
)
processor
=
mm_registry
.
create_processor
(
model_config
,
tokenizer
,
disable_cache
=
True
)
profiler
=
MultiModalProfiler
(
processor
)
dummy_data
=
profiler
.
get_dummy_data
(
seq_len
,
is_encoder_data
=
is_encoder_data
)
...
...
vllm/multimodal/registry.py
View file @
82551ad6
...
...
@@ -257,7 +257,9 @@ class MultiModalRegistry:
"""
if
self
.
has_processor
(
model_config
):
tokenizer
=
cached_tokenizer_from_config
(
model_config
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
,
disable_cache
=
True
)
seq_len
=
model_config
.
max_model_len
mm_limits
=
self
.
get_mm_limits_per_prompt
(
model_config
)
return
processor
.
info
.
get_mm_max_tokens_per_item
(
...
...
@@ -372,7 +374,9 @@ class MultiModalRegistry:
"""
if
self
.
has_processor
(
model_config
):
tokenizer
=
cached_tokenizer_from_config
(
model_config
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
)
processor
=
self
.
create_processor
(
model_config
,
tokenizer
,
disable_cache
=
True
)
profiler
=
MultiModalProfiler
(
processor
)
return
profiler
.
get_mm_limits
()
...
...
@@ -433,6 +437,8 @@ class MultiModalRegistry:
self
,
model_config
:
"ModelConfig"
,
tokenizer
:
AnyTokenizer
,
*
,
disable_cache
:
Optional
[
bool
]
=
None
,
)
->
BaseMultiModalProcessor
[
BaseProcessingInfo
]:
"""
Create a multi-modal processor for a specific model and tokenizer.
...
...
@@ -440,11 +446,13 @@ class MultiModalRegistry:
See also:
:ref:`mm-processing`
"""
if
disable_cache
is
None
:
disable_cache
=
model_config
.
disable_mm_preprocessor_cache
model_cls
=
self
.
_get_model_cls
(
model_config
)
factories
=
self
.
_processor_factories
[
model_cls
]
ctx
=
InputProcessingContext
(
model_config
,
tokenizer
)
cache
=
(
None
if
model_config
.
disable_mm_preprocessor_cache
else
self
.
_processing_cache
)
cache
=
None
if
disable_cache
else
self
.
_processing_cache
return
factories
.
build_processor
(
ctx
,
cache
=
cache
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment