{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "SGLang engine metrics and HiCache KV cache metrics", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 0 }, "id": 1, "panels": [], "title": "Request Latency", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 1 }, "id": 2, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "E2E Request Latency", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p99", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:e2e_request_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p50", "range": true, "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "avg(rate(sglang:e2e_request_latency_seconds_sum[$__rate_interval]) / rate(sglang:e2e_request_latency_seconds_count[$__rate_interval]))", "legendFormat": "avg", "range": true, "refId": "D" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 1 }, "id": 3, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Time-To-First-Token", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))", "legendFormat": "p99", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))", "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:time_to_first_token_seconds_bucket[$__rate_interval])))", "legendFormat": "p50", "range": true, "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "avg(rate(sglang:time_to_first_token_seconds_sum[$__rate_interval]) / rate(sglang:time_to_first_token_seconds_count[$__rate_interval]))", "legendFormat": "avg", "range": true, "refId": "D" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 1 }, "id": 4, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Inter-Token Latency", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p99", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.9, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p90", "range": true, "refId": "B" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.5, sum by (le) (rate(sglang:inter_token_latency_seconds_bucket[$__rate_interval])))", "legendFormat": "p50", "range": true, "refId": "C" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "avg(rate(sglang:inter_token_latency_seconds_sum[$__rate_interval]) / rate(sglang:inter_token_latency_seconds_count[$__rate_interval]))", "legendFormat": "avg", "range": true, "refId": "D" } ] }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 9 }, "id": 5, "panels": [], "title": "Throughput & Queue", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 10 }, "id": 6, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Token Generation Throughput (tokens/s)", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:gen_throughput", "legendFormat": "tokens/s", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 10 }, "id": 7, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Running & Queued Requests", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:num_running_reqs", "legendFormat": "Running", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:num_queue_reqs", "legendFormat": "Queued", "range": true, "refId": "B" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 10 }, "id": 8, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Request Rate", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "rate(sglang:num_requests_total[$__rate_interval])", "legendFormat": "Requests/s", "range": true, "refId": "A" } ] }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 18 }, "id": 9, "panels": [], "title": "Cache", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 19 }, "id": 10, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Cache Hit Rate", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:cache_hit_rate", "legendFormat": "Hit Rate", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 19 }, "id": 12, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Retractions", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "rate(sglang:num_retracted_reqs_total[$__rate_interval])", "legendFormat": "Retractions/s", "range": true, "refId": "A" } ] }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 27 }, "id": 13, "panels": [], "title": "Memory Pressure", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "percent", "min": 0, "max": 100 }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 28 }, "id": 14, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "GPU KV Cache Usage %", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:token_usage * 100", "legendFormat": "GPU KV Usage %", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "percent", "min": 0, "max": 100 }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 28 }, "id": 15, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Host (CPU) KV Cache Usage %", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "sglang:hicache_host_used_tokens / sglang:hicache_host_total_tokens * 100", "legendFormat": "Host KV Usage %", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 16, "y": 28 }, "id": 16, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Eviction & Load-back Rate (tokens/s)", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "rate(sglang:hicache_eviction_num_tokens_total[$__rate_interval])", "legendFormat": "Eviction", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "rate(sglang:hicache_load_back_num_tokens_total[$__rate_interval])", "legendFormat": "Load-back", "range": true, "refId": "B" } ] }, { "collapsed": false, "gridPos": { "h": 1, "w": 24, "x": 0, "y": 36 }, "id": 17, "panels": [], "title": "HiCache Latency", "type": "row" }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 0, "y": 37 }, "id": 18, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Eviction P99 Latency", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:hicache_eviction_duration_seconds_bucket[$__rate_interval])))", "legendFormat": "p99", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "h": 8, "w": 8, "x": 8, "y": 37 }, "id": 19, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "single", "sort": "none" } }, "title": "Load-back P99 Latency", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "P1809F7CD0C75ACF3" }, "editorMode": "code", "expr": "histogram_quantile(0.99, sum by (le) (rate(sglang:hicache_load_back_duration_seconds_bucket[$__rate_interval])))", "legendFormat": "p99", "range": true, "refId": "A" } ] } ], "schemaVersion": 39, "tags": [ "sglang", "hicache" ], "templating": { "list": [] }, "time": { "from": "now-1h", "to": "now" }, "timepicker": {}, "timezone": "", "title": "SGLang Engine", "uid": "sglang-engine", "version": 1 }