[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)

Signed-off-by: Chenheli Hua <huachenheli@outlook.com>

[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)
Signed-off-by: Chenheli Hua <huachenheli@outlook.com>
e58c5a97 · Chenheli Hua · GitHub · d46d417b · e58c5a97 · e58c5a97
Unverified Commit e58c5a97 authored Aug 19, 2025 by Chenheli Hua Committed by GitHub Aug 20, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 35 additions and 4 deletions

vllm/envs.py vllm/envs.py +4 -2

vllm/v1/engine/async_llm.py vllm/v1/engine/async_llm.py +31 -2

No files found.
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
    "VLLM_LORA_RESOLVER_CACHE_DIR":
    lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
-    # Enables torch profiler if set. Path to the directory where torch profiler
+    # Enables torch profiler if set.
-    # traces are saved. Note that it must be an absolute path.
+    # Both AsyncLLM's CPU traces as well as workers'
+    # traces (CPU & GPU) will be saved under this directory.
+    # Note that it must be an absolute path.
    "VLLM_TORCH_PROFILER_DIR":
    lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
             .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),

--- a/vllm/v1/engine/async_llm.py
+++ b/vllm/v1/engine/async_llm.py
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import asyncio
+import os
+import socket
 import time
 from collections.abc import AsyncGenerator, Iterable, Mapping
 from copy import copy
 from typing import Any, Optional, Union
 import numpy as np
+import torch
 import vllm.envs as envs
 from vllm.config import ModelConfig, VllmConfig
@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
        except RuntimeError:
            pass
+        if envs.VLLM_TORCH_PROFILER_DIR:
+            logger.info(
+                "Torch profiler enabled. AsyncLLM CPU traces will be collected under %s",  # noqa: E501
+                envs.VLLM_TORCH_PROFILER_DIR)
+            worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
+            self.profiler = torch.profiler.profile(
+                activities=[
+                    torch.profiler.ProfilerActivity.CPU,
+                ],
+                with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
+                on_trace_ready=torch.profiler.tensorboard_trace_handler(
+                    envs.VLLM_TORCH_PROFILER_DIR,
+                    worker_name=worker_name,
+                    use_gzip=True))
+        else:
+            logger.info(
+                "Torch profiler disabled. AsyncLLM CPU traces will not be collected."  # noqa: E501
+            )
+            self.profiler = None
    @classmethod
    @deprecate_kwargs(
        "disable_log_requests",
@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
            raise self.dead_error
    async def start_profile(self) -> None:
-        await self.engine_core.profile_async(True)
+        coros = [self.engine_core.profile_async(True)]
+        if self.profiler is not None:
+            coros.append(asyncio.to_thread(self.profiler.start))
+        await asyncio.gather(*coros)
    async def stop_profile(self) -> None:
-        await self.engine_core.profile_async(False)
+        coros = [self.engine_core.profile_async(False)]
+        if self.profiler is not None:
+            coros.append(asyncio.to_thread(self.profiler.stop))
+        await asyncio.gather(*coros)
    async def reset_mm_cache(self) -> None:
        self.processor.mm_registry.reset_processor_cache(self.model_config)