Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d1c2e15e
Unverified
Commit
d1c2e15e
authored
Dec 08, 2024
by
youkaichao
Committed by
GitHub
Dec 08, 2024
Browse files
[torch.compile] add dynamo time tracking (#11005)
Signed-off-by:
youkaichao
<
youkaichao@gmail.com
>
parent
af7c4a92
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
16 additions
and
5 deletions
+16
-5
vllm/compilation/backends.py
vllm/compilation/backends.py
+6
-0
vllm/compilation/decorators.py
vllm/compilation/decorators.py
+3
-3
vllm/compilation/monitor.py
vllm/compilation/monitor.py
+7
-2
No files found.
vllm/compilation/backends.py
View file @
d1c2e15e
...
@@ -265,7 +265,13 @@ class VllmBackend:
...
@@ -265,7 +265,13 @@ class VllmBackend:
def
__call__
(
self
,
graph
:
fx
.
GraphModule
,
example_inputs
)
->
Callable
:
def
__call__
(
self
,
graph
:
fx
.
GraphModule
,
example_inputs
)
->
Callable
:
# when dynamo calls the backend, it means the bytecode
# transform and analysis are done
compilation_counter
.
num_graphs_seen
+=
1
compilation_counter
.
num_graphs_seen
+=
1
from
.monitor
import
torch_compile_start_time
dynamo_time
=
time
.
time
()
-
torch_compile_start_time
logger
.
info
(
"Dynamo bytecode transform time: %.2f s"
,
dynamo_time
)
self
.
compilation_configs
.
compilation_time
+=
dynamo_time
# we control the compilation process, each instance can only be
# we control the compilation process, each instance can only be
# called once
# called once
...
...
vllm/compilation/decorators.py
View file @
d1c2e15e
...
@@ -145,6 +145,7 @@ def _support_torch_compile(
...
@@ -145,6 +145,7 @@ def _support_torch_compile(
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
''
,
**
kwargs
):
def
__init__
(
self
,
*
,
vllm_config
:
VllmConfig
,
prefix
:
str
=
''
,
**
kwargs
):
old_init
(
self
,
vllm_config
=
vllm_config
,
prefix
=
prefix
,
**
kwargs
)
old_init
(
self
,
vllm_config
=
vllm_config
,
prefix
=
prefix
,
**
kwargs
)
self
.
vllm_config
=
vllm_config
# for CompilationLevel.DYNAMO_AS_IS , the upper level model runner
# for CompilationLevel.DYNAMO_AS_IS , the upper level model runner
# will handle the compilation, so we don't need to do anything here.
# will handle the compilation, so we don't need to do anything here.
self
.
do_not_compile
=
\
self
.
do_not_compile
=
\
...
@@ -157,9 +158,6 @@ def _support_torch_compile(
...
@@ -157,9 +158,6 @@ def _support_torch_compile(
TorchCompileWrapperWithCustomDispatcher
.
__init__
(
TorchCompileWrapperWithCustomDispatcher
.
__init__
(
self
,
compilation_level
=
vllm_config
.
compilation_config
.
level
)
self
,
compilation_level
=
vllm_config
.
compilation_config
.
level
)
if
vllm_config
.
compilation_config
.
level
==
CompilationLevel
.
PIECEWISE
:
start_monitoring_torch_compile
(
vllm_config
.
compilation_config
)
cls
.
__init__
=
__init__
cls
.
__init__
=
__init__
def
__call__
(
self
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
*
args
,
**
kwargs
):
...
@@ -186,6 +184,8 @@ def _support_torch_compile(
...
@@ -186,6 +184,8 @@ def _support_torch_compile(
raise
ValueError
(
raise
ValueError
(
"Unsupported dynamic dimensions"
"Unsupported dynamic dimensions"
f
"
{
dims
}
for argument
{
k
}
with type
{
type
(
arg
)
}
."
)
f
"
{
dims
}
for argument
{
k
}
with type
{
type
(
arg
)
}
."
)
# here, it is the starting point of the `torch.compile` process
start_monitoring_torch_compile
(
self
.
vllm_config
.
compilation_config
)
# if we don't use custom dispatcher, we can directly call the
# if we don't use custom dispatcher, we can directly call the
# compiled function and let torch.compile handle the dispatching,
# compiled function and let torch.compile handle the dispatching,
...
...
vllm/compilation/monitor.py
View file @
d1c2e15e
import
time
from
vllm.config
import
CompilationConfig
,
CompilationLevel
from
vllm.config
import
CompilationConfig
,
CompilationLevel
from
vllm.logger
import
init_logger
from
vllm.logger
import
init_logger
logger
=
init_logger
(
__name__
)
logger
=
init_logger
(
__name__
)
torch_compile_start_time
:
float
=
0.0
def
start_monitoring_torch_compile
(
compilation_config
:
CompilationConfig
):
def
start_monitoring_torch_compile
(
compilation_config
:
CompilationConfig
):
pass
global
torch_compile_start_time
torch_compile_start_time
=
time
.
time
()
def
end_monitoring_torch_compile
(
compilation_config
:
CompilationConfig
):
def
end_monitoring_torch_compile
(
compilation_config
:
CompilationConfig
):
if
compilation_config
.
level
==
CompilationLevel
.
PIECEWISE
:
if
compilation_config
.
level
==
CompilationLevel
.
PIECEWISE
:
logger
.
info
(
"
graph
compil
ation
takes %.2f s in total"
,
logger
.
info
(
"
torch.
compil
e
takes %.2f s in total"
,
compilation_config
.
compilation_time
)
compilation_config
.
compilation_time
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment