Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
a37415c3
Unverified
Commit
a37415c3
authored
Mar 14, 2024
by
Allen.Dou
Committed by
GitHub
Mar 14, 2024
Browse files
allow user to chose which vllm's merics to display in grafana (#3393)
parent
81653d96
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
88 additions
and
96 deletions
+88
-96
examples/production_monitoring/grafana.json
examples/production_monitoring/grafana.json
+88
-96
No files found.
examples/production_monitoring/grafana.json
View file @
a37415c3
{
"__inputs"
:
[
{
"name"
:
"DS_PROMETHEUS"
,
"label"
:
"prometheus"
,
"description"
:
""
,
"type"
:
"datasource"
,
"pluginId"
:
"prometheus"
,
"pluginName"
:
"Prometheus"
}
],
"__elements"
:
{},
"__requires"
:
[
{
"type"
:
"grafana"
,
"id"
:
"grafana"
,
"name"
:
"Grafana"
,
"version"
:
"10.2.3"
},
{
"type"
:
"datasource"
,
"id"
:
"prometheus"
,
"name"
:
"Prometheus"
,
"version"
:
"1.0.0"
},
{
"type"
:
"panel"
,
"id"
:
"timeseries"
,
"name"
:
"Time series"
,
"version"
:
""
}
],
"annotations"
:
{
"list"
:
[
{
...
...
@@ -42,6 +11,12 @@
"hide"
:
true
,
"iconColor"
:
"rgba(0, 211, 255, 1)"
,
"name"
:
"Annotations & Alerts"
,
"target"
:
{
"limit"
:
100
,
"matchAny"
:
false
,
"tags"
:
[],
"type"
:
"dashboard"
},
"type"
:
"dashboard"
}
]
...
...
@@ -50,14 +25,14 @@
"editable"
:
true
,
"fiscalYearStartMonth"
:
0
,
"graphTooltip"
:
0
,
"id"
:
null
,
"id"
:
29
,
"links"
:
[],
"liveNow"
:
false
,
"panels"
:
[
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"End to end request latency measured in seconds."
,
"fieldConfig"
:
{
...
...
@@ -66,7 +41,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -80,7 +54,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -138,11 +111,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
false
,
"instant"
:
false
,
...
...
@@ -154,11 +127,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -171,11 +144,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -188,11 +161,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:e2e_request_latency_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -205,10 +178,10 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"editorMode"
:
"code"
,
"expr"
:
"rate(vllm:e2e_request_latency_seconds_sum[$__rate_interval])
\n
/
\n
rate(vllm:e2e_request_latency_seconds_count[$__rate_interval])"
,
"expr"
:
"rate(vllm:e2e_request_latency_seconds_sum
{model_name=
\"
$model_name
\"
}
[$__rate_interval])
\n
/
\n
rate(vllm:e2e_request_latency_seconds_count
{model_name=
\"
$model_name
\"
}
[$__rate_interval])"
,
"hide"
:
false
,
"instant"
:
false
,
"legendFormat"
:
"Average"
,
...
...
@@ -222,7 +195,7 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"Number of tokens processed per second"
,
"fieldConfig"
:
{
...
...
@@ -231,7 +204,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -245,7 +217,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -302,11 +273,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"rate(vllm:prompt_tokens_total[$__rate_interval])"
,
"expr"
:
"rate(vllm:prompt_tokens_total
{model_name=
\"
$model_name
\"
}
[$__rate_interval])"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
false
,
"instant"
:
false
,
...
...
@@ -318,11 +289,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"rate(vllm:generation_tokens_total[$__rate_interval])"
,
"expr"
:
"rate(vllm:generation_tokens_total
{model_name=
\"
$model_name
\"
}
[$__rate_interval])"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -339,7 +310,7 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"Inter token latency in seconds."
,
"fieldConfig"
:
{
...
...
@@ -348,7 +319,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -362,7 +332,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -420,11 +389,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
false
,
"instant"
:
false
,
...
...
@@ -436,11 +405,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -453,11 +422,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -470,11 +439,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:time_per_output_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -487,10 +456,10 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"editorMode"
:
"code"
,
"expr"
:
"rate(vllm:time_per_output_token_seconds_sum[$__rate_interval])
\n
/
\n
rate(vllm:time_per_output_token_seconds_count[$__rate_interval])"
,
"expr"
:
"rate(vllm:time_per_output_token_seconds_sum
{model_name=
\"
$model_name
\"
}
[$__rate_interval])
\n
/
\n
rate(vllm:time_per_output_token_seconds_count
{model_name=
\"
$model_name
\"
}
[$__rate_interval])"
,
"hide"
:
false
,
"instant"
:
false
,
"legendFormat"
:
"Mean"
,
...
...
@@ -504,7 +473,7 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"Number of requests in RUNNING, WAITING, and SWAPPED state"
,
"fieldConfig"
:
{
...
...
@@ -513,7 +482,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -527,7 +495,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -585,11 +552,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"vllm:num_requests_running"
,
"expr"
:
"vllm:num_requests_running
{model_name=
\"
$model_name
\"
}
"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
true
,
"instant"
:
false
,
...
...
@@ -601,11 +568,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"vllm:num_requests_swapped"
,
"expr"
:
"vllm:num_requests_swapped
{model_name=
\"
$model_name
\"
}
"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
true
,
...
...
@@ -618,11 +585,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"vllm:num_requests_waiting"
,
"expr"
:
"vllm:num_requests_waiting
{model_name=
\"
$model_name
\"
}
"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
true
,
...
...
@@ -639,7 +606,7 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"P50, P90, P95, and P99 TTFT latency in seconds."
,
"fieldConfig"
:
{
...
...
@@ -648,7 +615,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -662,7 +628,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -720,11 +685,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.99, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -737,11 +702,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.95, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"includeNullMetadata"
:
false
,
"instant"
:
false
,
...
...
@@ -753,11 +718,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.9, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -770,11 +735,11 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"disableTextWrap"
:
false
,
"editorMode"
:
"builder"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket[$__rate_interval])))"
,
"expr"
:
"histogram_quantile(0.5, sum by(le) (rate(vllm:time_to_first_token_seconds_bucket
{model_name=
\"
$model_name
\"
}
[$__rate_interval])))"
,
"fullMetaSearch"
:
false
,
"hide"
:
false
,
"includeNullMetadata"
:
false
,
...
...
@@ -787,10 +752,10 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"editorMode"
:
"code"
,
"expr"
:
"rate(vllm:time_to_first_token_seconds_sum[$__rate_interval])
\n
/
\n
rate(vllm:time_to_first_token_seconds_count[$__rate_interval])"
,
"expr"
:
"rate(vllm:time_to_first_token_seconds_sum
{model_name=
\"
$model_name
\"
}
[$__rate_interval])
\n
/
\n
rate(vllm:time_to_first_token_seconds_count
{model_name=
\"
$model_name
\"
}
[$__rate_interval])"
,
"hide"
:
false
,
"instant"
:
false
,
"legendFormat"
:
"Average"
,
...
...
@@ -804,7 +769,7 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"description"
:
"Percentage of used cache blocks by vLLM."
,
"fieldConfig"
:
{
...
...
@@ -813,7 +778,6 @@
"mode"
:
"palette-classic"
},
"custom"
:
{
"axisBorderShow"
:
false
,
"axisCenteredZero"
:
false
,
"axisColorMode"
:
"text"
,
"axisLabel"
:
""
,
...
...
@@ -827,7 +791,6 @@
"tooltip"
:
false
,
"viz"
:
false
},
"insertNulls"
:
false
,
"lineInterpolation"
:
"linear"
,
"lineWidth"
:
1
,
"pointSize"
:
5
,
...
...
@@ -885,10 +848,10 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"editorMode"
:
"code"
,
"expr"
:
"vllm:gpu_cache_usage_perc"
,
"expr"
:
"vllm:gpu_cache_usage_perc
{model_name=
\"
$model_name
\"
}
"
,
"instant"
:
false
,
"legendFormat"
:
"GPU Cache Usage"
,
"range"
:
true
,
...
...
@@ -897,10 +860,10 @@
{
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"
${DS_PROMETHEUS}
"
"uid"
:
"
prometheus
"
},
"editorMode"
:
"code"
,
"expr"
:
"vllm:cpu_cache_usage_perc"
,
"expr"
:
"vllm:cpu_cache_usage_perc
{model_name=
\"
$model_name
\"
}
"
,
"hide"
:
false
,
"instant"
:
false
,
"legendFormat"
:
"CPU Cache Usage"
,
...
...
@@ -913,10 +876,39 @@
}
],
"refresh"
:
""
,
"schemaVersion"
:
39
,
"schemaVersion"
:
37
,
"style"
:
"dark"
,
"tags"
:
[],
"templating"
:
{
"list"
:
[]
"list"
:
[
{
"current"
:
{
"selected"
:
false
,
"text"
:
"vllm"
,
"value"
:
"vllm"
},
"datasource"
:
{
"type"
:
"prometheus"
,
"uid"
:
"prometheus"
},
"definition"
:
"label_values(model_name)"
,
"hide"
:
0
,
"includeAll"
:
false
,
"label"
:
"model_name"
,
"multi"
:
false
,
"name"
:
"model_name"
,
"options"
:
[],
"query"
:
{
"query"
:
"label_values(model_name)"
,
"refId"
:
"StandardVariableQuery"
},
"refresh"
:
1
,
"regex"
:
""
,
"skipUrlSync"
:
false
,
"sort"
:
0
,
"type"
:
"query"
}
]
},
"time"
:
{
"from"
:
"now-5m"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment