Unverified Commit a06bf664 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

[Auto Sync] Update collector.py, startup_func_log_and_timer... (20250910) (#10242)


Co-authored-by: default avatargithub-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: default avatarcctry <shiyang@x.ai>
parent bf72b801
This diff is collapsed.
"""
Records startup latency breakdown by context using gauge metrics in seconds
"""
import logging
import time
from contextlib import contextmanager
from functools import wraps
from typing import Any, Callable, Dict, Generator, Optional
logger = logging.getLogger(__name__)
enable_startup_metrics = False
STARTUP_LATENCY_SECONDS = None
# Track maximum durations for each context
_max_durations: Dict[str, float] = {}
def enable_startup_timer():
"""Initialize startup latency metrics when metrics are enabled"""
# We need to import prometheus_client after setting the env variable `PROMETHEUS_MULTIPROC_DIR`
from prometheus_client import Gauge
global enable_startup_metrics, STARTUP_LATENCY_SECONDS
enable_startup_metrics = True
STARTUP_LATENCY_SECONDS = Gauge(
"sglang:startup_latency_breakdown_seconds_max",
"Startup latency breakdown in seconds by context, only records the maximum duration if the context is called multiple times.",
labelnames=["context"],
multiprocess_mode="mostrecent",
)
def set_startup_metric(context: str, value: float, should_log: bool = True):
"""Set the startup metric for a given context"""
if should_log:
logger.info(f"Setting startup metric: {context} took {value:.3f}s")
if not enable_startup_metrics:
return
current_max = _max_durations.get(context, 0.0)
if value > current_max:
_max_durations[context] = value
STARTUP_LATENCY_SECONDS.labels(context=context).set(value)
def reset_startup_timers():
"""Reset all recorded maximum durations. Useful for testing or reinitialization."""
global _max_durations
_max_durations.clear()
def get_max_duration(context: str) -> Optional[float]:
"""Get the maximum recorded duration for a context name."""
return _max_durations.get(context)
@contextmanager
def startup_timer(name: str, log_only: bool = False) -> Generator[None, None, None]:
"""
Context manager to measure startup latency for arbitrary code blocks.
Only records the maximum duration if the context is called multiple times.
Usage:
with startup_timer("model_loading"):
# model loading code
model = load_model()
with startup_timer("memory_allocation"):
# memory setup code
allocate_memory()
"""
start_time = time.monotonic()
try:
yield
finally:
duration_seconds = time.monotonic() - start_time
# Track the maximum duration for this context name
current_max = _max_durations.get(name, 0.0)
is_new_max = duration_seconds > current_max
if is_new_max:
_max_durations[name] = duration_seconds
# Only update Prometheus gauge if this is a new maximum
if enable_startup_metrics and not log_only:
STARTUP_LATENCY_SECONDS.labels(context=name).set(duration_seconds)
# Log with indication if this was a new max
logger.info(f"Startup timing: {name} took {duration_seconds:.3f}s")
def time_startup_latency(
func: Callable = None, name: Optional[str] = None, log_only: bool = False
) -> Callable[..., Any]:
"""
A decorator to measure startup context latency and record it in seconds.
Only records the maximum duration if the context is called multiple times.
Usage:
@time_startup_latency
def load_model():
# model loading code
@time_startup_latency(name="custom_init")
def initialize_something():
# initialization code
@time_startup_latency(name="debug_only", log_only=True)
def debug_function():
# This will only log, not record to Prometheus
"""
def measure(func: Callable[..., Any]) -> Callable[..., Any]:
nonlocal name
name = name or func.__name__
@wraps(func)
def wrapper(*args, **kwargs):
start_time = time.monotonic()
try:
result = func(*args, **kwargs)
return result
finally:
duration_seconds = time.monotonic() - start_time
# Track the maximum duration for this context name
current_max = _max_durations.get(name, 0.0)
is_new_max = duration_seconds > current_max
if is_new_max:
_max_durations[name] = duration_seconds
# Only update Prometheus gauge if this is a new maximum
if enable_startup_metrics and not log_only:
STARTUP_LATENCY_SECONDS.labels(context=name).set(
duration_seconds
)
# Log the timing
logger.info(f"Startup timing: {name} took {duration_seconds:.3f}s")
return wrapper
if func:
return measure(func)
else:
return measure
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment