{ "annotations": { "list": [ { "builtIn": 1, "datasource": { "type": "grafana", "uid": "-- Grafana --" }, "enable": true, "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", "type": "dashboard" } ] }, "description": "Dynamo dashboard with GPU utilization and NIXL transfer metrics for disaggregated serving", "editable": true, "fiscalYearStartMonth": 0, "graphTooltip": 0, "id": null, "links": [], "panels": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "dynamo_frontend_requests_total (1m)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Requests", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "red", "value": 80 } ] } }, "overrides": [] }, "gridPos": { "x": 0, "y": 0, "w": 8, "h": 8 }, "id": 1, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((rate(dynamo_frontend_requests_total{namespace=\"$namespace\"}[30s]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{request_type}}, {{status}}", "range": true, "refId": "A" } ], "title": "Frontend Requests / Sec", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Time to first token - includes prefill queue delay, GPU compute, and NIXL transfer time", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "milliseconds", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] } }, "overrides": [] }, "gridPos": { "x": 8, "y": 0, "w": 8, "h": 8 }, "id": 2, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((1000 * (rate(dynamo_frontend_time_to_first_token_seconds_sum{namespace=\"$namespace\"}[5m]) / rate(dynamo_frontend_time_to_first_token_seconds_count{namespace=\"$namespace\"}[5m])))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{model}}", "range": true, "refId": "A" } ], "title": "Frontend Avg Time to First Token", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "dynamo_frontend_request_duration (sum/count)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "milliseconds", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] } }, "overrides": [] }, "gridPos": { "x": 16, "y": 0, "w": 8, "h": 8 }, "id": 5, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((1000 * (rate(dynamo_frontend_request_duration_seconds_sum{namespace=\"$namespace\"}[5m]) / rate(dynamo_frontend_request_duration_seconds_count{namespace=\"$namespace\"}[5m])))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{model}}", "range": true, "refId": "A" } ], "title": "Frontend Avg Request Duration", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "dynamo_frontend_inter_token_latency_seconds (sum/count)", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "milliseconds", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] } }, "overrides": [] }, "gridPos": { "x": 0, "y": 8, "w": 8, "h": 8 }, "id": 3, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((1000 * (rate(dynamo_frontend_inter_token_latency_seconds_sum{namespace=\"$namespace\"}[5m]) / rate(dynamo_frontend_inter_token_latency_seconds_count{namespace=\"$namespace\"}[5m])))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{model}}", "range": true, "refId": "A" } ], "title": "Frontend Avg Inter-Token Latency", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Input/Output sequence length in tokens", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Tokens", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] } }, "overrides": [] }, "gridPos": { "x": 8, "y": 8, "w": 8, "h": 8 }, "id": 4, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "single", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((rate(dynamo_frontend_input_sequence_tokens_sum{namespace=\"$namespace\"}[5m]) / rate(dynamo_frontend_input_sequence_tokens_count{namespace=\"$namespace\"}[5m]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "ISL", "range": true, "refId": "A" }, { "editorMode": "code", "expr": "((rate(dynamo_frontend_output_sequence_tokens_sum{namespace=\"$namespace\"}[5m]) / rate(dynamo_frontend_output_sequence_tokens_count{namespace=\"$namespace\"}[5m]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "OSL", "range": true, "refId": "B" } ], "title": "Frontend Avg Input/Output Sequence Length", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Number of requests waiting in queue. High values indicate workers are saturated and cannot keep up with incoming load. This is THE key metric for diagnosing prefill worker bottlenecks.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Queued Requests", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 10 }, { "color": "red", "value": 50 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "x": 16, "y": 8, "w": 8, "h": 8 }, "id": 35, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((dynamo_frontend_queued_requests)) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{model}}", "range": true, "refId": "A" } ], "title": "Frontend Queued Requests", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Prefill processing time including KV cache transfer over NIXL", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "ms", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "ms" }, "overrides": [] }, "gridPos": { "x": 0, "y": 16, "w": 8, "h": 8 }, "id": 20, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(1000 * rate(dynamo_component_request_duration_seconds_sum{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m]) / rate(dynamo_component_request_duration_seconds_count{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "Avg - {{pod}}", "range": true, "refId": "A" }, { "editorMode": "code", "expr": "(1000 * histogram_quantile(0.99, rate(dynamo_component_request_duration_seconds_bucket{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "P99 - {{pod}}", "range": true, "refId": "B" } ], "title": "Prefill Worker Processing Time", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Prefill request throughput", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "req/s", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "reqps" }, "overrides": [] }, "gridPos": { "x": 8, "y": 16, "w": 8, "h": 8 }, "id": 21, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(rate(dynamo_component_requests_total{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{pod}}", "range": true, "refId": "A" } ], "title": "Prefill Worker Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Component request latency comparison - Prefill (includes NIXL transfer) vs Decode", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "seconds", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "x": 16, "y": 16, "w": 8, "h": 8 }, "id": 22, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(rate(dynamo_component_request_duration_seconds_sum{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m]) / rate(dynamo_component_request_duration_seconds_count{namespace=\"$namespace\",dynamo_component=\"prefill\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "Prefill Avg - {{pod}}", "range": true, "refId": "A" }, { "editorMode": "code", "expr": "(rate(dynamo_component_request_duration_seconds_sum{namespace=\"$namespace\",dynamo_component=\"backend\",dynamo_endpoint=\"generate\"}[5m]) / rate(dynamo_component_request_duration_seconds_count{namespace=\"$namespace\",dynamo_component=\"backend\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "Decode Avg - {{pod}}", "range": true, "refId": "B" } ], "title": "Component Latency - Prefill vs Decode", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Decode worker request throughput", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "req/s", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "reqps" }, "overrides": [] }, "gridPos": { "x": 0, "y": 24, "w": 8, "h": 8 }, "id": 23, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((rate(dynamo_component_requests_total{namespace=\"$namespace\",dynamo_component=\"backend\"}[5m]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{pod}}", "range": true, "refId": "A" } ], "title": "Decode Worker - Request Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Decode worker average request duration", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "seconds", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "s" }, "overrides": [] }, "gridPos": { "x": 8, "y": 24, "w": 8, "h": 8 }, "id": 24, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "((rate(dynamo_component_request_duration_seconds_sum{namespace=\"$namespace\",dynamo_component=\"backend\"}[5m]) / rate(dynamo_component_request_duration_seconds_count{namespace=\"$namespace\",dynamo_component=\"backend\"}[5m]))) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{pod}}", "range": true, "refId": "A" } ], "title": "Decode Worker - Avg Request Duration", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "KV cache GPU memory utilization (0-100%) for decode workers. High values (>90%) indicate decode workers are at capacity. NOTE: Prefill workers do not expose this metric - monitor 'Prefill Worker Processing Time' to detect prefill worker saturation.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Utilization %", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "max": 1.0, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 0.8 }, { "color": "red", "value": 0.95 } ] }, "unit": "percentunit" }, "overrides": [] }, "gridPos": { "x": 16, "y": 24, "w": 8, "h": 8 }, "id": 34, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(dynamo_component_kvstats_gpu_cache_usage_percent{namespace=\"$namespace\"}) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{pod}}", "range": true, "refId": "A" } ], "title": "KV Cache Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Active KV cache blocks vs total available blocks for decode workers. Shows numeric capacity utilization. When active approaches total, workers are at capacity.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Blocks", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null } ] }, "unit": "short" }, "overrides": [ { "matcher": { "id": "byName", "options": "Total Blocks" }, "properties": [ { "id": "custom.lineStyle", "value": { "dash": [ 10, 10 ], "fill": "dash" } } ] } ] }, "gridPos": { "x": 0, "y": 32, "w": 8, "h": 8 }, "id": 36, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(dynamo_component_kvstats_active_blocks{namespace=\"$namespace\"}) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "Active - {{pod}}", "range": true, "refId": "A" }, { "editorMode": "code", "expr": "(dynamo_component_kvstats_total_blocks{namespace=\"$namespace\"}) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "Total - {{pod}}", "range": true, "refId": "B" } ], "title": "KV Cache Blocks (Active/Total)", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "GPU compute utilization. Prefill workers show high utilization during prefill phase.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "% Utilization", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "x": 8, "y": 32, "w": 8, "h": 8 }, "id": 10, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "DCGM_FI_DEV_GPU_UTIL", "legendFormat": "GPU {{gpu}} - {{Hostname}}", "range": true, "refId": "A" } ], "title": "GPU Compute Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "GPU memory copy bandwidth utilization (%). Spikes indicate KV cache transfers over NIXL. On single-node deployments, NIXL uses CUDA IPC (GPU→Host→Host→GPU) not direct GPU-to-GPU. High sustained values (>80%) indicate transfer bottleneck.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "% Bandwidth", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 15, "gradientMode": "opacity", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "line+area" } }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 60 }, { "color": "red", "value": 80 } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "x": 0, "y": 40, "w": 8, "h": 8 }, "id": 11, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "DCGM_FI_DEV_MEM_COPY_UTIL", "legendFormat": "GPU {{gpu}} - {{Hostname}}", "range": true, "refId": "A" } ], "title": "GPU Memory Bandwidth", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "GPU memory usage. Prefill workers allocate KV blocks on decode workers via NIXL.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "MB", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "gbytes" }, "overrides": [] }, "gridPos": { "x": 16, "y": 32, "w": 8, "h": 8 }, "id": 12, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "DCGM_FI_DEV_FB_USED / 1024", "legendFormat": "GPU {{gpu}} - {{Hostname}}", "range": true, "refId": "A" } ], "title": "GPU Memory Used", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "CPU usage by worker pods (cores). High values may indicate CPU bottleneck.", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "CPU Cores", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" }, { "color": "yellow", "value": 30 }, { "color": "red", "value": 50 } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "x": 16, "y": 40, "w": 8, "h": 8 }, "id": 25, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(rate(container_cpu_usage_seconds_total{namespace=\"$namespace\",pod=~\".*worker.*\",container=\"main\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{pod}}", "range": true, "refId": "A" } ], "title": "Worker CPU Usage", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Overall node CPU utilization percentage by mode", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "%", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "normal" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "max": 100, "min": 0, "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "percent" }, "overrides": [] }, "gridPos": { "x": 0, "y": 48, "w": 8, "h": 8 }, "id": 28, "options": { "legend": { "calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "100 - (avg by(instance) (rate(node_cpu_seconds_total{mode=\"idle\"}[5m])) * 100)", "legendFormat": "CPU Used - {{instance}}", "range": true, "refId": "A" } ], "title": "Node CPU Utilization", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Worker request throughput - requests per second by component", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "msg/s", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "short" }, "overrides": [] }, "gridPos": { "x": 8, "y": 48, "w": 8, "h": 8 }, "id": 29, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(rate(dynamo_component_requests_total{namespace=\"$namespace\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "{{dynamo_component}} - {{pod}}", "range": true, "refId": "A" } ], "title": "Worker Request Throughput", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "description": "Worker data transfer - request/response bytes per second", "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "Bytes/s", "axisPlacement": "auto", "barAlignment": 0, "barWidthFactor": 0.6, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, "viz": false }, "insertNulls": false, "lineInterpolation": "smooth", "lineWidth": 2, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "auto", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green" } ] }, "unit": "Bps" }, "overrides": [] }, "gridPos": { "x": 16, "y": 48, "w": 8, "h": 8 }, "id": 30, "options": { "legend": { "calcs": [ "mean", "last" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "hideZeros": false, "mode": "multi", "sort": "none" } }, "targets": [ { "editorMode": "code", "expr": "(rate(dynamo_component_request_bytes_total{namespace=\"$namespace\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "IN - {{pod}}", "range": true, "refId": "A" }, { "editorMode": "code", "expr": "(rate(dynamo_component_response_bytes_total{namespace=\"$namespace\",dynamo_endpoint=\"generate\"}[5m])) * on(pod, namespace) group_left() kube_pod_status_phase{phase=\"Running\"}", "legendFormat": "OUT - {{pod}}", "range": true, "refId": "B" } ], "title": "Worker Data Transfer", "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "fieldConfig": { "defaults": { "color": { "mode": "palette-classic" }, "custom": { "axisBorderShow": false, "axisCenteredZero": false, "axisColorMode": "text", "axisLabel": "", "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", "fillOpacity": 10, "gradientMode": "none", "hideFrom": { "tooltip": false, "viz": false, "legend": false }, "insertNulls": false, "lineInterpolation": "linear", "lineWidth": 1, "pointSize": 5, "scaleDistribution": { "type": "linear" }, "showPoints": "never", "spanNulls": false, "stacking": { "group": "A", "mode": "none" }, "thresholdsStyle": { "mode": "off" } }, "mappings": [], "thresholds": { "mode": "absolute", "steps": [ { "color": "green", "value": null }, { "color": "yellow", "value": 20 }, { "color": "red", "value": 35 } ] }, "unit": "GBs", "min": 0, "max": 50, "decimals": 2 }, "overrides": [] }, "gridPos": { "x": 8, "y": 40, "w": 8, "h": 8 }, "id": 37, "options": { "legend": { "calcs": [ "mean", "max" ], "displayMode": "table", "placement": "bottom", "showLegend": true }, "tooltip": { "mode": "multi", "sort": "none" } }, "pluginVersion": "11.1.3", "targets": [ { "datasource": { "type": "prometheus", "uid": "${datasource}" }, "expr": "(rate(DCGM_FI_PROF_NVLINK_TX_BYTES[1m]) + rate(DCGM_FI_PROF_NVLINK_RX_BYTES[1m])) / 1e9", "legendFormat": "GPU {{gpu}} - {{Hostname}}", "refId": "A" } ], "title": "NVLink Bandwidth (GB/s)", "type": "timeseries", "description": "NVLink transfer bandwidth in GB/s (rate of change), measured from DCGM profiling metrics. Shows total bidirectional bandwidth (TX + RX) per GPU. This includes intra-pod TP communication (TP=2 for prefill, TP=4 for decode). Low bandwidth (<1 GB/s) indicates inter-pod NIXL KV cache transfers may be using host memory copies instead of direct NVLink/GPUDirect." } ], "refresh": "10s", "schemaVersion": 41, "tags": [ "dynamo", "disaggregated", "nixl", "gpu" ], "templating": { "list": [ { "current": { "text": "default", "value": "default" }, "label": "Data source", "name": "datasource", "options": [], "query": "prometheus", "refresh": 1, "regex": "", "type": "datasource" }, { "current": { "selected": false, "text": "robert", "value": "robert" }, "hide": 0, "label": "Namespace", "name": "namespace", "options": [], "query": "label_values(dynamo_frontend_requests_total, namespace)", "refresh": 1, "regex": "", "skipUrlSync": false, "sort": 0, "type": "query", "datasource": { "type": "prometheus", "uid": "${datasource}" } } ] }, "time": { "from": "now-30m", "to": "now" }, "timepicker": {}, "timezone": "browser", "title": "Dynamo Disaggregated Analysis", "uid": "dynamo-disagg-analysis", "version": 1 }