Unverified Commit e58c5a97 authored by Chenheli Hua's avatar Chenheli Hua Committed by GitHub
Browse files

[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)


Signed-off-by: default avatarChenheli Hua <huachenheli@outlook.com>
parent d46d417b
...@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = { ...@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_LORA_RESOLVER_CACHE_DIR": "VLLM_LORA_RESOLVER_CACHE_DIR":
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None), lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
# Enables torch profiler if set. Path to the directory where torch profiler # Enables torch profiler if set.
# traces are saved. Note that it must be an absolute path. # Both AsyncLLM's CPU traces as well as workers'
# traces (CPU & GPU) will be saved under this directory.
# Note that it must be an absolute path.
"VLLM_TORCH_PROFILER_DIR": "VLLM_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))), .path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
......
# SPDX-License-Identifier: Apache-2.0 # SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio import asyncio
import os
import socket
import time import time
from collections.abc import AsyncGenerator, Iterable, Mapping from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy from copy import copy
from typing import Any, Optional, Union from typing import Any, Optional, Union
import numpy as np import numpy as np
import torch
import vllm.envs as envs import vllm.envs as envs
from vllm.config import ModelConfig, VllmConfig from vllm.config import ModelConfig, VllmConfig
...@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient): ...@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
except RuntimeError: except RuntimeError:
pass pass
if envs.VLLM_TORCH_PROFILER_DIR:
logger.info(
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
envs.VLLM_TORCH_PROFILER_DIR)
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
],
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
on_trace_ready=torch.profiler.tensorboard_trace_handler(
envs.VLLM_TORCH_PROFILER_DIR,
worker_name=worker_name,
use_gzip=True))
else:
logger.info(
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
)
self.profiler = None
@classmethod @classmethod
@deprecate_kwargs( @deprecate_kwargs(
"disable_log_requests", "disable_log_requests",
...@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient): ...@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
raise self.dead_error raise self.dead_error
async def start_profile(self) -> None: async def start_profile(self) -> None:
await self.engine_core.profile_async(True) coros = [self.engine_core.profile_async(True)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.start))
await asyncio.gather(*coros)
async def stop_profile(self) -> None: async def stop_profile(self) -> None:
await self.engine_core.profile_async(False) coros = [self.engine_core.profile_async(False)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.stop))
await asyncio.gather(*coros)
async def reset_mm_cache(self) -> None: async def reset_mm_cache(self) -> None:
self.processor.mm_registry.reset_processor_cache(self.model_config) self.processor.mm_registry.reset_processor_cache(self.model_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment