Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e58c5a97
Unverified
Commit
e58c5a97
authored
Aug 19, 2025
by
Chenheli Hua
Committed by
GitHub
Aug 20, 2025
Browse files
[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)
Signed-off-by:
Chenheli Hua
<
huachenheli@outlook.com
>
parent
d46d417b
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
35 additions
and
4 deletions
+35
-4
vllm/envs.py
vllm/envs.py
+4
-2
vllm/v1/engine/async_llm.py
vllm/v1/engine/async_llm.py
+31
-2
No files found.
vllm/envs.py
View file @
e58c5a97
...
@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
...
@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_LORA_RESOLVER_CACHE_DIR"
:
"VLLM_LORA_RESOLVER_CACHE_DIR"
:
lambda
:
os
.
getenv
(
"VLLM_LORA_RESOLVER_CACHE_DIR"
,
None
),
lambda
:
os
.
getenv
(
"VLLM_LORA_RESOLVER_CACHE_DIR"
,
None
),
# Enables torch profiler if set. Path to the directory where torch profiler
# Enables torch profiler if set.
# traces are saved. Note that it must be an absolute path.
# Both AsyncLLM's CPU traces as well as workers'
# traces (CPU & GPU) will be saved under this directory.
# Note that it must be an absolute path.
"VLLM_TORCH_PROFILER_DIR"
:
"VLLM_TORCH_PROFILER_DIR"
:
lambda
:
(
None
if
os
.
getenv
(
"VLLM_TORCH_PROFILER_DIR"
,
None
)
is
None
else
os
lambda
:
(
None
if
os
.
getenv
(
"VLLM_TORCH_PROFILER_DIR"
,
None
)
is
None
else
os
.
path
.
expanduser
(
os
.
getenv
(
"VLLM_TORCH_PROFILER_DIR"
,
"."
))),
.
path
.
expanduser
(
os
.
getenv
(
"VLLM_TORCH_PROFILER_DIR"
,
"."
))),
...
...
vllm/v1/engine/async_llm.py
View file @
e58c5a97
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import
asyncio
import
asyncio
import
os
import
socket
import
time
import
time
from
collections.abc
import
AsyncGenerator
,
Iterable
,
Mapping
from
collections.abc
import
AsyncGenerator
,
Iterable
,
Mapping
from
copy
import
copy
from
copy
import
copy
from
typing
import
Any
,
Optional
,
Union
from
typing
import
Any
,
Optional
,
Union
import
numpy
as
np
import
numpy
as
np
import
torch
import
vllm.envs
as
envs
import
vllm.envs
as
envs
from
vllm.config
import
ModelConfig
,
VllmConfig
from
vllm.config
import
ModelConfig
,
VllmConfig
...
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
...
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
except
RuntimeError
:
except
RuntimeError
:
pass
pass
if
envs
.
VLLM_TORCH_PROFILER_DIR
:
logger
.
info
(
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s"
,
# noqa: E501
envs
.
VLLM_TORCH_PROFILER_DIR
)
worker_name
=
f
"
{
socket
.
gethostname
()
}
_
{
os
.
getpid
()
}
.async_llm"
self
.
profiler
=
torch
.
profiler
.
profile
(
activities
=
[
torch
.
profiler
.
ProfilerActivity
.
CPU
,
],
with_stack
=
envs
.
VLLM_TORCH_PROFILER_WITH_STACK
,
on_trace_ready
=
torch
.
profiler
.
tensorboard_trace_handler
(
envs
.
VLLM_TORCH_PROFILER_DIR
,
worker_name
=
worker_name
,
use_gzip
=
True
))
else
:
logger
.
info
(
"Torch profiler disabled. AsyncLLM CPU traces will not be collected."
# noqa: E501
)
self
.
profiler
=
None
@
classmethod
@
classmethod
@
deprecate_kwargs
(
@
deprecate_kwargs
(
"disable_log_requests"
,
"disable_log_requests"
,
...
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
...
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
raise
self
.
dead_error
raise
self
.
dead_error
async
def
start_profile
(
self
)
->
None
:
async
def
start_profile
(
self
)
->
None
:
await
self
.
engine_core
.
profile_async
(
True
)
coros
=
[
self
.
engine_core
.
profile_async
(
True
)]
if
self
.
profiler
is
not
None
:
coros
.
append
(
asyncio
.
to_thread
(
self
.
profiler
.
start
))
await
asyncio
.
gather
(
*
coros
)
async
def
stop_profile
(
self
)
->
None
:
async
def
stop_profile
(
self
)
->
None
:
await
self
.
engine_core
.
profile_async
(
False
)
coros
=
[
self
.
engine_core
.
profile_async
(
False
)]
if
self
.
profiler
is
not
None
:
coros
.
append
(
asyncio
.
to_thread
(
self
.
profiler
.
stop
))
await
asyncio
.
gather
(
*
coros
)
async
def
reset_mm_cache
(
self
)
->
None
:
async
def
reset_mm_cache
(
self
)
->
None
:
self
.
processor
.
mm_registry
.
reset_processor_cache
(
self
.
model_config
)
self
.
processor
.
mm_registry
.
reset_processor_cache
(
self
.
model_config
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment