Unverified Commit d3fb7d57 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(sglang): lazy-import prometheus_client to fix TokenizerMetricsCollector metrics (#6269)


Signed-off-by: default avatarIshan Dhanani <ishandhanani@gmail.com>
parent bc514fbe
...@@ -9,11 +9,11 @@ from typing import TYPE_CHECKING, List, Optional, Tuple ...@@ -9,11 +9,11 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
import sglang as sgl import sglang as sgl
import zmq import zmq
import zmq.asyncio import zmq.asyncio
from prometheus_client import CollectorRegistry
from sglang.srt.disaggregation.kv_events import ZmqEventPublisher from sglang.srt.disaggregation.kv_events import ZmqEventPublisher
from sglang.srt.utils import get_local_ip_auto, get_zmq_socket, maybe_wrap_ipv6_address from sglang.srt.utils import get_local_ip_auto, get_zmq_socket, maybe_wrap_ipv6_address
if TYPE_CHECKING: if TYPE_CHECKING:
from prometheus_client import CollectorRegistry
from sglang.srt.managers.scheduler_metrics_mixin import KvMetrics from sglang.srt.managers.scheduler_metrics_mixin import KvMetrics
from dynamo.common.utils.prometheus import ( from dynamo.common.utils.prometheus import (
...@@ -28,10 +28,6 @@ from dynamo.llm import ( ...@@ -28,10 +28,6 @@ from dynamo.llm import (
from dynamo.runtime import Component, Endpoint from dynamo.runtime import Component, Endpoint
from dynamo.sglang.args import Config from dynamo.sglang.args import Config
# Create a dedicated registry for dynamo_component metrics
# This ensures these metrics are isolated and can be exposed via their own callback
DYNAMO_COMPONENT_REGISTRY = CollectorRegistry()
def format_zmq_endpoint(endpoint_template: str, ip_address: str) -> str: def format_zmq_endpoint(endpoint_template: str, ip_address: str) -> str:
"""Format ZMQ endpoint by replacing wildcard with IP address. """Format ZMQ endpoint by replacing wildcard with IP address.
...@@ -284,7 +280,7 @@ class DynamoSglangPublisher: ...@@ -284,7 +280,7 @@ class DynamoSglangPublisher:
def setup_prometheus_registry( def setup_prometheus_registry(
engine: sgl.Engine, generate_endpoint: Endpoint, config: Config engine: sgl.Engine, generate_endpoint: Endpoint, config: Config
) -> CollectorRegistry: ) -> "CollectorRegistry":
"""Set up Prometheus registry for SGLang metrics collection. """Set up Prometheus registry for SGLang metrics collection.
SGLang uses multiprocess architecture where metrics are stored in shared memory. SGLang uses multiprocess architecture where metrics are stored in shared memory.
...@@ -355,14 +351,19 @@ async def setup_sgl_metrics( ...@@ -355,14 +351,19 @@ async def setup_sgl_metrics(
# Always register the Dynamo component metrics callback (total_blocks, # Always register the Dynamo component metrics callback (total_blocks,
# gpu_cache_usage, model_load_time). These use a dedicated registry that # gpu_cache_usage, model_load_time). These use a dedicated registry that
# doesn't need MultiProcessCollector or PROMETHEUS_MULTIPROC_DIR. # doesn't need MultiProcessCollector or PROMETHEUS_MULTIPROC_DIR.
# Import CollectorRegistry lazily to avoid importing prometheus_client
# before set_prometheus_multiproc_dir() has been called.
from prometheus_client import CollectorRegistry
dynamo_component_registry = CollectorRegistry()
register_engine_metrics_callback( register_engine_metrics_callback(
endpoint=generate_endpoint, endpoint=generate_endpoint,
registry=DYNAMO_COMPONENT_REGISTRY, registry=dynamo_component_registry,
) )
# Create all Dynamo component gauges using the dedicated registry # Create all Dynamo component gauges using the dedicated registry
component_gauges = LLMBackendMetrics( component_gauges = LLMBackendMetrics(
registry=DYNAMO_COMPONENT_REGISTRY, registry=dynamo_component_registry,
model_name=engine.server_args.served_model_name, model_name=engine.server_args.served_model_name,
component_name=config.dynamo_args.component, component_name=config.dynamo_args.component,
) )
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
This directory contains example Grafana dashboards for Dynamo observability. These are starter files that you can use as references for building your own custom dashboards. This directory contains example Grafana dashboards for Dynamo observability. These are starter files that you can use as references for building your own custom dashboards.
- `dynamo.json` - General Dynamo dashboard showing software and hardware metrics - `dynamo.json` - General Dynamo dashboard showing software and hardware metrics
- `sglang.json` - SGLang engine metrics (request latency, throughput, cache) and HiCache KV cache metrics (GPU/CPU tier usage, eviction/load-back, PIN count)
- `dcgm-metrics.json` - GPU metrics dashboard using DCGM exporter data - `dcgm-metrics.json` - GPU metrics dashboard using DCGM exporter data
- `kvbm.json` - KV Block Manager metrics dashboard - `kvbm.json` - KV Block Manager metrics dashboard
- `temp-loki.json` - Logging dashboard for Loki integration - `temp-loki.json` - Logging dashboard for Loki integration
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment