Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
458c1a4b
Unverified
Commit
458c1a4b
authored
Mar 14, 2026
by
Nick Hill
Committed by
GitHub
Mar 14, 2026
Browse files
[Frontend] Reduce chat template warmup logging levels (#37062)
Signed-off-by:
Nick Hill
<
nickhill123@gmail.com
>
parent
821fde2d
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
8 deletions
+7
-8
vllm/renderers/base.py
vllm/renderers/base.py
+7
-8
No files found.
vllm/renderers/base.py
View file @
458c1a4b
...
...
@@ -179,17 +179,17 @@ class BaseRenderer(ABC, Generic[_T]):
from
vllm.entrypoints.chat_utils
import
ChatTemplateResolutionError
try
:
logger
.
info
(
"Warming up chat template processing..."
)
logger
.
debug
(
"Warming up chat template processing..."
)
start_time
=
time
.
perf_counter
()
self
.
render_chat
([[{
"role"
:
"user"
,
"content"
:
"warmup"
}]],
chat_params
)
elapsed
=
time
.
perf_counter
()
-
start_time
logger
.
info
(
"Chat template warmup completed in %.3fs"
,
elapsed
)
logger
.
debug
(
"Chat template warmup completed in %.3fs"
,
elapsed
)
except
ChatTemplateResolutionError
:
logger
.
info
(
"This model does not support chat template."
)
logger
.
debug
(
"This model does not support chat template."
)
except
Exception
:
logger
.
exception
(
"Chat template warmup failed"
)
logger
.
warning
(
"Chat template warmup failed"
,
exc_info
=
True
)
if
self
.
mm_processor
:
from
vllm.multimodal.processing
import
TimingContext
...
...
@@ -200,7 +200,7 @@ class BaseRenderer(ABC, Generic[_T]):
mm_limits
=
processor
.
info
.
allowed_mm_limits
try
:
logger
.
info
(
"Warming up multi-modal processing..."
)
logger
.
debug
(
"Warming up multi-modal processing..."
)
start_time
=
time
.
perf_counter
()
processor_inputs
=
processor
.
dummy_inputs
.
get_dummy_processor_inputs
(
...
...
@@ -209,14 +209,13 @@ class BaseRenderer(ABC, Generic[_T]):
mm_options
=
mm_config
.
limit_per_prompt
,
)
_
=
processor
.
apply
(
processor_inputs
,
timing_ctx
=
TimingContext
(
enabled
=
False
),
processor_inputs
,
timing_ctx
=
TimingContext
(
enabled
=
False
)
)
elapsed
=
time
.
perf_counter
()
-
start_time
logger
.
info
(
"Multi-modal warmup completed in %.3fs"
,
elapsed
)
except
Exception
:
logger
.
exception
(
"Multi-modal warmup failed"
)
logger
.
warning
(
"Multi-modal warmup failed"
)
finally
:
self
.
clear_mm_cache
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment