Unverified Commit d3fb7d57 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

fix(sglang): lazy-import prometheus_client to fix TokenizerMetricsCollector metrics (#6269)


Signed-off-by: default avatarIshan Dhanani <ishandhanani@gmail.com>
parent bc514fbe
......@@ -9,11 +9,11 @@ from typing import TYPE_CHECKING, List, Optional, Tuple
import sglang as sgl
import zmq
import zmq.asyncio
from prometheus_client import CollectorRegistry
from sglang.srt.disaggregation.kv_events import ZmqEventPublisher
from sglang.srt.utils import get_local_ip_auto, get_zmq_socket, maybe_wrap_ipv6_address
if TYPE_CHECKING:
from prometheus_client import CollectorRegistry
from sglang.srt.managers.scheduler_metrics_mixin import KvMetrics
from dynamo.common.utils.prometheus import (
......@@ -28,10 +28,6 @@ from dynamo.llm import (
from dynamo.runtime import Component, Endpoint
from dynamo.sglang.args import Config
# Create a dedicated registry for dynamo_component metrics
# This ensures these metrics are isolated and can be exposed via their own callback
DYNAMO_COMPONENT_REGISTRY = CollectorRegistry()
def format_zmq_endpoint(endpoint_template: str, ip_address: str) -> str:
"""Format ZMQ endpoint by replacing wildcard with IP address.
......@@ -284,7 +280,7 @@ class DynamoSglangPublisher:
def setup_prometheus_registry(
engine: sgl.Engine, generate_endpoint: Endpoint, config: Config
) -> CollectorRegistry:
) -> "CollectorRegistry":
"""Set up Prometheus registry for SGLang metrics collection.
SGLang uses multiprocess architecture where metrics are stored in shared memory.
......@@ -355,14 +351,19 @@ async def setup_sgl_metrics(
# Always register the Dynamo component metrics callback (total_blocks,
# gpu_cache_usage, model_load_time). These use a dedicated registry that
# doesn't need MultiProcessCollector or PROMETHEUS_MULTIPROC_DIR.
# Import CollectorRegistry lazily to avoid importing prometheus_client
# before set_prometheus_multiproc_dir() has been called.
from prometheus_client import CollectorRegistry
dynamo_component_registry = CollectorRegistry()
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=DYNAMO_COMPONENT_REGISTRY,
registry=dynamo_component_registry,
)
# Create all Dynamo component gauges using the dedicated registry
component_gauges = LLMBackendMetrics(
registry=DYNAMO_COMPONENT_REGISTRY,
registry=dynamo_component_registry,
model_name=engine.server_args.served_model_name,
component_name=config.dynamo_args.component,
)
......
......@@ -3,6 +3,7 @@
This directory contains example Grafana dashboards for Dynamo observability. These are starter files that you can use as references for building your own custom dashboards.
- `dynamo.json` - General Dynamo dashboard showing software and hardware metrics
- `sglang.json` - SGLang engine metrics (request latency, throughput, cache) and HiCache KV cache metrics (GPU/CPU tier usage, eviction/load-back, PIN count)
- `dcgm-metrics.json` - GPU metrics dashboard using DCGM exporter data
- `kvbm.json` - KV Block Manager metrics dashboard
- `temp-loki.json` - Logging dashboard for Loki integration
......
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"description": "SGLang engine metrics and HiCache KV cache metrics",
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"panels": [
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 0
},
"id": 1,
"panels": [],
"title": "Request Latency",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 1
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "E2E Request Latency",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p99",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p90",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p50",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "avg(rate(sglang:e2e_request_latency_seconds_sum[$__rate_interval]) / rate(sglang:e2e_request_latency_seconds_count[$__rate_interval]))",
"legendFormat": "avg",
"range": true,
"refId": "D"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 1
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Time-To-First-Token",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))",
"legendFormat": "p99",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))",
"legendFormat": "p90",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))",
"legendFormat": "p50",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "avg(rate(sglang:time_to_first_token_seconds_sum[$__rate_interval]) / rate(sglang:time_to_first_token_seconds_count[$__rate_interval]))",
"legendFormat": "avg",
"range": true,
"refId": "D"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 1
},
"id": 4,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Inter-Token Latency",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p99",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p90",
"range": true,
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))",
"legendFormat": "p50",
"range": true,
"refId": "C"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "avg(rate(sglang:inter_token_latency_seconds_sum[$__rate_interval]) / rate(sglang:inter_token_latency_seconds_count[$__rate_interval]))",
"legendFormat": "avg",
"range": true,
"refId": "D"
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 9
},
"id": 5,
"panels": [],
"title": "Throughput & Queue",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 10
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Token Generation Throughput (tokens/s)",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:gen_throughput",
"legendFormat": "tokens/s",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 10
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Running & Queued Requests",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:num_running_reqs",
"legendFormat": "Running",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:num_queue_reqs",
"legendFormat": "Queued",
"range": true,
"refId": "B"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 10
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Request Rate",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "rate(sglang:num_requests_total[$__rate_interval])",
"legendFormat": "Requests/s",
"range": true,
"refId": "A"
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 18
},
"id": 9,
"panels": [],
"title": "Cache & PIN",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percentunit"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 19
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Cache Hit Rate",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:cache_hit_rate",
"legendFormat": "Hit Rate",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 19
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Active PIN Count",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:hicache_active_pin_count",
"legendFormat": "PINs",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 19
},
"id": 12,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Retractions",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "rate(sglang:num_retracted_reqs_total[$__rate_interval])",
"legendFormat": "Retractions/s",
"range": true,
"refId": "A"
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 27
},
"id": 13,
"panels": [],
"title": "Memory Pressure",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent",
"min": 0,
"max": 100
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 28
},
"id": 14,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "GPU KV Cache Usage %",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:token_usage * 100",
"legendFormat": "GPU KV Usage %",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "percent",
"min": 0,
"max": 100
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 28
},
"id": 15,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Host (CPU) KV Cache Usage %",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "sglang:hicache_host_used_tokens / sglang:hicache_host_total_tokens * 100",
"legendFormat": "Host KV Usage %",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 16,
"y": 28
},
"id": 16,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Eviction & Load-back Rate (tokens/s)",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "rate(sglang:hicache_eviction_num_tokens_total[$__rate_interval])",
"legendFormat": "Eviction",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "rate(sglang:hicache_load_back_num_tokens_total[$__rate_interval])",
"legendFormat": "Load-back",
"range": true,
"refId": "B"
}
]
},
{
"collapsed": false,
"gridPos": {
"h": 1,
"w": 24,
"x": 0,
"y": 36
},
"id": 17,
"panels": [],
"title": "HiCache Latency",
"type": "row"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 0,
"y": 37
},
"id": 18,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Eviction P99 Latency",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:hicache_eviction_duration_seconds_bucket[$__rate_interval])))",
"legendFormat": "p99",
"range": true,
"refId": "A"
}
]
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
},
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 8,
"x": 8,
"y": 37
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "single",
"sort": "none"
}
},
"title": "Load-back P99 Latency",
"type": "timeseries",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:hicache_load_back_duration_seconds_bucket[$__rate_interval])))",
"legendFormat": "p99",
"range": true,
"refId": "A"
}
]
}
],
"schemaVersion": 39,
"tags": [
"sglang",
"hicache"
],
"templating": {
"list": []
},
"time": {
"from": "now-1h",
"to": "now"
},
"timepicker": {},
"timezone": "",
"title": "SGLang Engine",
"uid": "sglang-engine",
"version": 1
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment