# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import gc import json import time from collections import Counter from contextlib import suppress from typing import Any, Optional from vllm.envs import VLLM_GC_DEBUG from vllm.logger import init_logger logger = init_logger(__name__) class GCDebugConfig: """ Config for GC Debugger. - 0: disable GC debugger - 1: enable GC debugger with gc.collect elpased times - '{"top_objects":5}': enable GC debugger with top 5 collected objects """ def __init__(self, gc_debug_conf: Optional[str] = None) -> None: self.enabled: bool = False self.top_objects: int = -1 if not gc_debug_conf or gc_debug_conf == "0": pass elif gc_debug_conf == "1": self.enabled = True else: try: json_conf = json.loads(gc_debug_conf) self.enabled = True self.top_objects = json_conf.get("top_objects", -1) except Exception: self.enabled = False logger.error("Failed to parse VLLM_GC_DEBUG(%s)", VLLM_GC_DEBUG) logger.info("GC Debug Config. %s", str(self)) def __repr__(self) -> str: return f"enabled:{self.enabled},top_objects:{self.top_objects}" class GCDebugger: """ Debugger for GC which logs helpful information for GC understanding. To enable, you should call maybe_attach_gc_debug_callback in the process. """ def __init__(self, config: GCDebugConfig) -> None: self.config = config # Start time in micro second of this GC cycle self.start_time_ns: int = time.monotonic_ns() # If config.top_objects is positive, # compute top collected objects by object types self.gc_top_collected_objects: str = "" def handle(self, phase: str, info: dict[str, int]) -> None: """ Handles a GC event (e.g. GC start or GC finish) """ generation = info.get("generation") if generation is None: return if phase == "start": # Before GC started, record GC start time # and top collected objects self.start_time_ns = time.monotonic_ns() self.gc_top_collected_objects = _compute_top_gc_collected_objects( gc.get_objects(generation), self.config.top_objects) elif phase == "stop": # After GC finished, Record GC elapsed time and # optionally top collected objects elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6 logger.info( "GC took %.3fms to complete. " "Collected %s objects in GC generation %d.%s", elpased_ms, str(info.get("collected", "?")), generation, (f" Top collected objects: \n{self.gc_top_collected_objects}" if self.gc_top_collected_objects else ""), ) def maybe_attach_gc_debug_callback() -> None: """ Attached a callback for GC debug when VLLM_GC_DEBUG is enabled. """ config = GCDebugConfig(VLLM_GC_DEBUG) if config.enabled: debugger: GCDebugger = GCDebugger(config) def gc_callback(phase: str, info: dict[str, int]) -> None: debugger.handle(phase, info) gc.callbacks.append(gc_callback) def _compute_detailed_type(o: Any) -> str: """ Detailed object type. TODO(Jialin): Further enhance the detailed type with element types for easier debugging. We tried but occasionally it would run into signals which kills the engine. """ size_str: str = "" # Object doesn't support len() - this can happen with type objects # or other objects that don't implement __len__ properly with suppress(Exception): size_str = f"(size:{len(o)})" return f"{str(type(o))}{size_str}" def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str: """ Group collected objects by types. """ if top <= 0: return "" object_types = [_compute_detailed_type(o) for o in objects] return "\n".join( f"{count:>5}:{object_type}" for object_type, count in Counter(object_types).most_common(top))