{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "copyright": [ "SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.", "SPDX-License-Identifier: Apache-2.0", "Licensed under the Apache License, Version 2.0 (the \"License\");", "you may not use this file except in compliance with the License.", "You may obtain a copy of the License at", "http://www.apache.org/licenses/LICENSE-2.0", "Unless required by applicable law or agreed to in writing, software", "distributed under the License is distributed on an \"AS IS\" BASIS,", "WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.", "See the License for the specific language governing permissions and", "limitations under the License." ], "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": 1, "links": [], "liveNow": false, "panels": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, "id": 1, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "KV Cache Utilization by Worker", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * llm_kv_blocks_active{component=\"$component\", endpoint=\"$endpoint\"} / llm_kv_blocks_total{component=\"$component\", endpoint=\"$endpoint\"}", "legendFormat": "Worker {{worker_id}}", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, "id": 2, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "Request Slot Utilization by Worker", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * llm_requests_active_slots{component=\"$component\", endpoint=\"$endpoint\"} / llm_requests_total_slots{component=\"$component\", endpoint=\"$endpoint\"}", "legendFormat": "Worker {{worker_id}}", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 4, "x": 0, "y": 8 }, "id": 3, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true }, "pluginVersion": "10.0.0", "title": "Average KV Cache Utilization", "type": "gauge", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * avg(llm_kv_blocks_active{component=\"$component\", endpoint=\"$endpoint\"}) / avg(llm_kv_blocks_total{component=\"$component\", endpoint=\"$endpoint\"})", "legendFormat": "__auto", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 4, "x": 4, "y": 8 }, "id": 4, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true }, "pluginVersion": "10.0.0", "title": "Average Request Slot Utilization", "type": "gauge", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * avg(llm_requests_active_slots{component=\"$component\", endpoint=\"$endpoint\"}) / avg(llm_requests_total_slots{component=\"$component\", endpoint=\"$endpoint\"})", "legendFormat": "__auto", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "thresholds" }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 50 }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 4, "x": 8, "y": 8 }, "id": 7, "options": { "orientation": "auto", "reduceOptions": { "calcs": [ "lastNotNull" ], "fields": "", "values": false }, "showThresholdLabels": false, "showThresholdMarkers": true }, "pluginVersion": "10.0.0", "title": "Cumulative KV Cache Hit Rate", "type": "gauge", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * sum(llm_kv_hit_rate_overlap_blocks{component=\"$component\", endpoint=\"$endpoint\"}) / sum(llm_kv_hit_rate_isl_blocks{component=\"$component\", endpoint=\"$endpoint\"})", "legendFormat": "__auto", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, "id": 5, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "Load Average & Standard Deviation", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "llm_load_avg{component=\"$component\", endpoint=\"$endpoint\"}", "legendFormat": "Average", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "llm_load_std{component=\"$component\", endpoint=\"$endpoint\"}", "hide": false, "legendFormat": "StdDev", "range": true, "refId": "B" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, "id": 8, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "KV Cache Hit Rate by Worker", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * llm_kv_hit_rate_overlap_blocks{component=\"$component\", endpoint=\"$endpoint\"} / llm_kv_hit_rate_isl_blocks{component=\"$component\", endpoint=\"$endpoint\"}", "legendFormat": "Worker {{worker_id}}", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, "id": 9, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "Cumulative KV Cache Hit Rate", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "100 * sum(llm_kv_hit_rate_overlap_blocks{component=\"$component\", endpoint=\"$endpoint\"}) / sum(llm_kv_hit_rate_isl_blocks{component=\"$component\", endpoint=\"$endpoint\"})", "legendFormat": "Overall Hit Rate", "range": true, "refId": "A" } ] }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 20, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "none" }, "overrides": [] }, "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, "id": 6, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "right", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "title": "Available Resources", "type": "timeseries", "targets": [ { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "sum(llm_kv_blocks_total{component=\"$component\", endpoint=\"$endpoint\"} - llm_kv_blocks_active{component=\"$component\", endpoint=\"$endpoint\"})", "legendFormat": "Available KV Blocks", "range": true, "refId": "A" }, { "datasource": { "type": "prometheus", "uid": "prometheus" }, "editorMode": "code", "expr": "sum(llm_requests_total_slots{component=\"$component\", endpoint=\"$endpoint\"} - llm_requests_active_slots{component=\"$component\", endpoint=\"$endpoint\"})", "hide": false, "legendFormat": "Available Request Slots", "range": true, "refId": "B" } ] } ], "refresh": "2s", "schemaVersion": 38, "style": "dark", "tags": [ "llm", "metrics" ], "templating": { "list": [ { "current": { "selected": false, "text": "component", "value": "vllm" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, "definition": "label_values(llm_kv_blocks_active, component)", "hide": 0, "includeAll": false, "label": "Component", "multi": false, "name": "component", "options": [], "query": { "query": "label_values(llm_kv_blocks_active, component)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" }, { "current": { "selected": false, "text": "endpoint", "value": "load_metrics" }, "datasource": { "type": "prometheus", "uid": "prometheus" }, "definition": "label_values(llm_kv_blocks_active{component=\"$component\"}, endpoint)", "hide": 0, "includeAll": false, "label": "Endpoint", "multi": false, "name": "endpoint", "options": [], "query": { "query": "label_values(llm_kv_blocks_active{component=\"$component\"}, endpoint)", "refId": "StandardVariableQuery" }, "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query" } ] }, "time": { "from": "now-5m", "to": "now" }, "timepicker": {}, "timezone": "", "title": "LLM Worker Metrics", "uid": "llm-worker-metrics", "version": 1, "weekStart": "" }