Unverified Commit e858bfe0 authored by Benjamin Chislett's avatar Benjamin Chislett Committed by GitHub
Browse files

[Cleanup] Refactor profiling env vars into a CLI config (#29912)


Signed-off-by: default avatarBenjamin Chislett <bchislett@nvidia.com>
Signed-off-by: default avatarBenjamin Chislett <chislett.ben@gmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: default avatarHarry Mellor <19981378+hmellor@users.noreply.github.com>
parent d471b2af
...@@ -98,10 +98,10 @@ class TPUWorker: ...@@ -98,10 +98,10 @@ class TPUWorker:
# MP runtime is initialized. # MP runtime is initialized.
self.profiler = None self.profiler = None
self.profile_dir = None self.profile_dir = None
if envs.VLLM_TORCH_PROFILER_DIR and self.rank < 1: if vllm_config.profiler_config.profiler == "torch" and self.rank < 1:
# For TPU, we can only have 1 active profiler session for 1 profiler # For TPU, we can only have 1 active profiler session for 1 profiler
# server. So we only profile on rank0. # server. So we only profile on rank0.
self.profile_dir = envs.VLLM_TORCH_PROFILER_DIR self.profile_dir = vllm_config.profiler_config.torch_profiler_dir
logger.info( logger.info(
"Profiling enabled. Traces will be saved to: %s", self.profile_dir "Profiling enabled. Traces will be saved to: %s", self.profile_dir
) )
......
...@@ -6,12 +6,12 @@ from typing import Any ...@@ -6,12 +6,12 @@ from typing import Any
import torch import torch
import torch.distributed import torch.distributed
import vllm.envs as envs
from vllm.config import VllmConfig from vllm.config import VllmConfig
from vllm.distributed import get_world_group from vllm.distributed import get_world_group
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.model_executor import set_random_seed from vllm.model_executor import set_random_seed
from vllm.platforms import current_platform from vllm.platforms import current_platform
from vllm.profiler.wrapper import TorchProfilerWrapper
from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment from vllm.v1.worker.gpu_worker import Worker, init_worker_distributed_environment
from vllm.v1.worker.xpu_model_runner import XPUModelRunner from vllm.v1.worker.xpu_model_runner import XPUModelRunner
...@@ -36,41 +36,17 @@ class XPUWorker(Worker): ...@@ -36,41 +36,17 @@ class XPUWorker(Worker):
assert device_config.device_type == "xpu" assert device_config.device_type == "xpu"
assert current_platform.is_xpu() assert current_platform.is_xpu()
# Torch profiler. Enabled and configured through env vars: # Torch profiler. Enabled and configured through profiler_config.
# VLLM_TORCH_PROFILER_DIR=/path/to/save/trace
self.profiler: Any | None = None self.profiler: Any | None = None
if envs.VLLM_TORCH_PROFILER_DIR: profiler_config = vllm_config.profiler_config
torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR if profiler_config.profiler == "torch":
worker_name = f"{vllm_config.instance_id}-rank-{self.rank}" worker_name = f"{vllm_config.instance_id}-rank-{self.rank}"
logger.info( self.profiler = TorchProfilerWrapper(
"Profiling enabled. Traces will be saved to: %s", profiler_config,
torch_profiler_trace_dir,
)
logger.debug(
"Profiler config: record_shapes=%s,"
"profile_memory=%s,with_stack=%s,with_flops=%s",
envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
envs.VLLM_TORCH_PROFILER_WITH_STACK,
envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
)
self.profiler = torch.profiler.profile(
activities=[
torch.profiler.ProfilerActivity.CPU,
torch.profiler.ProfilerActivity.XPU,
],
record_shapes=envs.VLLM_TORCH_PROFILER_RECORD_SHAPES,
profile_memory=envs.VLLM_TORCH_PROFILER_WITH_PROFILE_MEMORY,
with_stack=envs.VLLM_TORCH_PROFILER_WITH_STACK,
with_flops=envs.VLLM_TORCH_PROFILER_WITH_FLOPS,
on_trace_ready=torch.profiler.tensorboard_trace_handler(
torch_profiler_trace_dir,
worker_name=worker_name, worker_name=worker_name,
use_gzip=envs.VLLM_TORCH_PROFILER_USE_GZIP, local_rank=self.local_rank,
), activities=["CPU", "XPU"],
) )
else:
self.profiler = None
# we provide this function due to `torch.xpu.mem_get_info()` doesn't # we provide this function due to `torch.xpu.mem_get_info()` doesn't
# return correct free_gpu_memory on intel client GPU. We need to # return correct free_gpu_memory on intel client GPU. We need to
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment