"tests/entrypoints/openai/completion/test_completion.py" did not exist on "04cf435d95fee3e4c0ba521583c1a64bc348c89d"
Unverified Commit e58c5a97 authored by Chenheli Hua's avatar Chenheli Hua Committed by GitHub
Browse files

[Core] Add torch profiler CPU traces for AsyncLLM. (#21794)


Signed-off-by: default avatarChenheli Hua <huachenheli@outlook.com>
parent d46d417b
......@@ -667,8 +667,10 @@ environment_variables: dict[str, Callable[[], Any]] = {
"VLLM_LORA_RESOLVER_CACHE_DIR":
lambda: os.getenv("VLLM_LORA_RESOLVER_CACHE_DIR", None),
# Enables torch profiler if set. Path to the directory where torch profiler
# traces are saved. Note that it must be an absolute path.
# Enables torch profiler if set.
# Both AsyncLLM's CPU traces as well as workers'
# traces (CPU & GPU) will be saved under this directory.
# Note that it must be an absolute path.
"VLLM_TORCH_PROFILER_DIR":
lambda: (None if os.getenv("VLLM_TORCH_PROFILER_DIR", None) is None else os
.path.expanduser(os.getenv("VLLM_TORCH_PROFILER_DIR", "."))),
......
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import asyncio
import os
import socket
import time
from collections.abc import AsyncGenerator, Iterable, Mapping
from copy import copy
from typing import Any, Optional, Union
import numpy as np
import torch
import vllm.envs as envs
from vllm.config import ModelConfig, VllmConfig
......@@ -144,6 +147,26 @@ class AsyncLLM(EngineClient):
except RuntimeError:
pass
if envs.VLLM_TORCH_PROFILER_DIR:
logger.info(
"Torch profiler enabled. AsyncLLM CPU traces will be collected under %s", # noqa: E501
envs.VLLM_TORCH_PROFILER_DIR)
worker_name = f"{socket.gethostname()}_{os.getpid()}.async_llm"
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
],
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
on_trace_ready=torch.profiler.tensorboard_trace_handler(
envs.VLLM_TORCH_PROFILER_DIR,
worker_name=worker_name,
use_gzip=True))
else:
logger.info(
"Torch profiler disabled. AsyncLLM CPU traces will not be collected." # noqa: E501
)
self.profiler = None
@classmethod
@deprecate_kwargs(
"disable_log_requests",
......@@ -562,10 +585,16 @@ class AsyncLLM(EngineClient):
raise self.dead_error
async def start_profile(self) -> None:
await self.engine_core.profile_async(True)
coros = [self.engine_core.profile_async(True)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.start))
await asyncio.gather(*coros)
async def stop_profile(self) -> None:
await self.engine_core.profile_async(False)
coros = [self.engine_core.profile_async(False)]
if self.profiler is not None:
coros.append(asyncio.to_thread(self.profiler.stop))
await asyncio.gather(*coros)
async def reset_mm_cache(self) -> None:
self.processor.mm_registry.reset_processor_cache(self.model_config)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment