Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d2e841a1
Unverified
Commit
d2e841a1
authored
Jul 07, 2025
by
Kyle Yu
Committed by
GitHub
Jul 08, 2025
Browse files
[Misc] Improve logging for dynamic shape cache compilation (#20573)
Signed-off-by:
kyolebu
<
kyu@redhat.com
>
parent
14601f5f
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
36 additions
and
15 deletions
+36
-15
vllm/compilation/backends.py
vllm/compilation/backends.py
+36
-15
No files found.
vllm/compilation/backends.py
View file @
d2e841a1
...
...
@@ -120,10 +120,15 @@ class CompilerManager:
handle
=
self
.
cache
[(
runtime_shape
,
graph_index
,
self
.
compiler
.
name
)]
compiled_graph
=
self
.
compiler
.
load
(
handle
,
graph
,
example_inputs
,
graph_index
,
runtime_shape
)
logger
.
debug
(
"Directly load the %s-th graph for shape %s from %s via "
"handle %s"
,
graph_index
,
str
(
runtime_shape
),
self
.
compiler
.
name
,
handle
)
if
runtime_shape
is
None
:
logger
.
debug
(
"Directly load the %s-th graph for dynamic shape from %s via "
"handle %s"
,
graph_index
,
self
.
compiler
.
name
,
handle
)
else
:
logger
.
debug
(
"Directly load the %s-th graph for shape %s from %s via "
"handle %s"
,
graph_index
,
str
(
runtime_shape
),
self
.
compiler
.
name
,
handle
)
return
compiled_graph
def
compile
(
self
,
...
...
@@ -152,9 +157,15 @@ class CompilerManager:
# there can be multiple graphs due to piecewise compilation.
now
=
time
.
time
()
elapsed
=
now
-
compilation_start_time
logger
.
info
(
"Directly load the compiled graph(s) for shape %s "
"from the cache, took %.3f s"
,
str
(
runtime_shape
),
elapsed
)
if
runtime_shape
is
None
:
logger
.
info
(
"Directly load the compiled graph(s) for dynamic shape "
"from the cache, took %.3f s"
,
elapsed
)
else
:
logger
.
info
(
"Directly load the compiled graph(s) for shape %s "
"from the cache, took %.3f s"
,
str
(
runtime_shape
),
elapsed
)
return
compiled_graph
# no compiler cached the graph, or the cache is disabled,
...
...
@@ -178,11 +189,21 @@ class CompilerManager:
self
.
is_cache_updated
=
True
if
graph_index
==
0
:
# adds some info logging for the first graph
logger
.
info
(
"Cache the graph of shape %s for later use"
,
str
(
runtime_shape
))
logger
.
debug
(
"store the %s-th graph for shape %s from %s via handle %s"
,
graph_index
,
str
(
runtime_shape
),
self
.
compiler
.
name
,
handle
)
if
runtime_shape
is
None
:
logger
.
info
(
"Cache the graph for dynamic shape for later use"
)
else
:
logger
.
info
(
"Cache the graph of shape %s for later use"
,
str
(
runtime_shape
))
if
runtime_shape
is
None
:
logger
.
debug
(
"Store the %s-th graph for dynamic shape from %s via "
"handle %s"
,
graph_index
,
self
.
compiler
.
name
,
handle
)
else
:
logger
.
debug
(
"Store the %s-th graph for shape %s from %s via handle %s"
,
graph_index
,
str
(
runtime_shape
),
self
.
compiler
.
name
,
handle
)
# after compiling the last graph, record the end time
if
graph_index
==
num_graphs
-
1
:
...
...
@@ -190,7 +211,7 @@ class CompilerManager:
elapsed
=
now
-
compilation_start_time
compilation_config
.
compilation_time
+=
elapsed
if
runtime_shape
is
None
:
logger
.
info
(
"Compiling a graph for
general
shape takes %.2f s"
,
logger
.
info
(
"Compiling a graph for
dynamic
shape takes %.2f s"
,
elapsed
)
else
:
logger
.
info
(
"Compiling a graph for shape %s takes %.2f s"
,
...
...
@@ -308,7 +329,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
i
for
i
,
x
in
enumerate
(
args
)
if
isinstance
(
x
,
torch
.
SymInt
)
]
global
compilation_start_time
compiled_graph_for_
general
_shape
=
self
.
vllm_backend
.
\
compiled_graph_for_
dynamic
_shape
=
self
.
vllm_backend
.
\
compiler_manager
.
compile
(
submod
,
args
,
...
...
@@ -323,7 +344,7 @@ class PiecewiseCompileInterpreter(torch.fx.Interpreter):
self
.
module
.
__dict__
[
target
]
=
piecewise_backend
(
submod
,
self
.
vllm_config
,
self
.
graph_pool
,
index
,
len
(
self
.
compile_submod_names
),
sym_shape_indices
,
compiled_graph_for_
general
_shape
,
self
.
vllm_backend
)
compiled_graph_for_
dynamic
_shape
,
self
.
vllm_backend
)
compilation_counter
.
num_piecewise_capturable_graphs_seen
+=
1
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment