Unverified Commit 615580d8 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: Base metrics: add generic ingress handler metrics (#2090)


Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent e2a514b2
......@@ -19,6 +19,8 @@ graph TD
PROMETHEUS -->|:9401/metrics| DCGM_EXPORTER[dcgm-exporter :9401]
PROMETHEUS -->|:7777/metrics| NATS_PROM_EXP
PROMETHEUS -->|:8080/metrics| DYNAMOFE[Dynamo HTTP FE :8080]
PROMETHEUS -->|:8081/metrics| DYNAMOBACKEND[Dynamo backend :8081]
DYNAMOFE --> DYNAMOBACKEND
GRAFANA -->|:9090/query API| PROMETHEUS
end
```
......@@ -34,12 +36,14 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container
2. Start Dynamo dependencies. Assume you're at the root dynamo path:
```bash
docker compose -f deploy/docker-compose.yml up -d # Minimum components for Dynamo: etcd/nats/dcgm-exporter
# or
docker compose -f deploy/docker-compose.yml --profile metrics up -d # In addition to the above, start Prometheus & Grafana
# Start the basic services (etcd & natsd), along with Prometheus and Grafana
docker compose -f deploy/docker-compose.yml --profile metrics up -d
# Minimum components for Dynamo: etcd/nats/dcgm-exporter
docker compose -f deploy/docker-compose.yml up -d
```
To target specific GPU(s), export the variable below before running Docker Compose:
Optional: To target specific GPU(s), export the variable below before running Docker Compose
```bash
export CUDA_VISIBLE_DEVICES=0,2
```
......@@ -63,9 +67,15 @@ As of Q2 2025, Dynamo HTTP Frontend metrics are exposed when you build container
### Prometheus
The Prometheus configuration is defined in [prometheus.yml](./prometheus.yml). It is configured to scrape metrics from the metrics aggregation service endpoint.
The Prometheus configuration is specified in [prometheus.yml](./prometheus.yml). This file is set up to collect metrics from the metrics aggregation service endpoint.
Please be aware that you might need to modify the target settings to align with your specific host configuration and network environment.
After making changes to prometheus.yml, it is necessary to reload the configuration using the command below. Simply sending a kill -HUP signal will not suffice due to the caching of the volume that contains the prometheus.yml file.
Note: You may need to adjust the target based on your host configuration and network setup.
```
docker compose -f deploy/docker-compose.yml up prometheus -d --force-recreate
```
### Grafana
......@@ -82,11 +92,13 @@ The following configuration files should be present in this directory:
- [grafana-datasources.yml](./grafana-datasources.yml): Contains Grafana datasource configuration
- [grafana_dashboards/grafana-dashboard-providers.yml](./grafana_dashboards/grafana-dashboard-providers.yml): Contains Grafana dashboard provider configuration
- [grafana_dashboards/grafana-dynamo-dashboard.json](./grafana_dashboards/grafana-dynamo-dashboard.json): A general Dynamo Dashboard for both SW and HW metrics.
- [grafana_dashboards/grafana-llm-metrics.json](./grafana_dashboards/grafana-llm-metrics.json): Contains Grafana dashboard configuration for LLM specific metrics.
- [grafana_dashboards/grafana-dcgm-metrics.json](./grafana_dashboards/grafana-dcgm-metrics.json): Contains Grafana dashboard configuration for DCGM GPU metrics
- [grafana_dashboards/grafana-llm-metrics.json](./grafana_dashboards/grafana-llm-metrics.json): This file, which is being phased out, contains the Grafana dashboard configuration for LLM-specific metrics. It requires an additional `metrics` component to operate concurrently. A new version is under development.
## Running the example `metrics` component
IMPORTANT: This section is being phased out, and some metrics may not function as expected. A new solution is under development.
When you run the example [components/metrics](../../components/metrics/README.md) component, it exposes a Prometheus /metrics endpoint with the followings (defined in [../../components/metrics/src/lib.rs](../../components/metrics/src/lib.rs)):
- `llm_requests_active_slots`: Number of currently active request slots per worker
- `llm_requests_total_slots`: Total available request slots per worker
......
......@@ -19,7 +19,7 @@
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": 1,
"id": 4,
"links": [],
"panels": [
{
......@@ -112,7 +112,7 @@
"refId": "A"
}
],
"title": "Requests / Sec",
"title": "Frontend Requests / Sec",
"type": "timeseries"
},
{
......@@ -205,7 +205,7 @@
"refId": "A"
}
],
"title": "Avg Time to First Token",
"title": "Frontend Avg Time to First Token",
"type": "timeseries"
},
{
......@@ -298,7 +298,7 @@
"refId": "A"
}
],
"title": "Avg Inter-Token Latency",
"title": "Frontend Avg Inter-Token Latency",
"type": "timeseries"
},
{
......@@ -391,7 +391,7 @@
"refId": "A"
}
],
"title": "Avg Request Duration",
"title": "Frontend Avg Request Duration",
"type": "timeseries"
},
{
......@@ -497,7 +497,7 @@
"refId": "B"
}
],
"title": "Avg Input/Output Sequence Length",
"title": "Frontend Avg Input/Output Sequence Length",
"type": "timeseries"
},
{
......@@ -611,17 +611,406 @@
],
"title": "DCGM GPU Utilization",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 0,
"y": 16
},
"id": 19,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"editorMode": "code",
"expr": "rate(dynamo_response_bytes_total{endpoint=\"generate\"}[1m])",
"legendFormat": "Response bytes",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "rate(dynamo_request_bytes_total{endpoint=\"generate\"}[1m])",
"hide": false,
"instant": false,
"legendFormat": "Request bytes",
"range": true,
"refId": "B"
}
],
"title": "dynamo.vllm bytes / sec",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 6,
"y": 16
},
"id": 18,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"editorMode": "code",
"expr": "rate(dynamo_requests_total{endpoint=\"generate\"}[1m])",
"legendFormat": "__auto",
"range": true,
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"editorMode": "code",
"expr": "",
"hide": false,
"instant": false,
"legendFormat": "__auto",
"range": true,
"refId": "B"
}
],
"title": "dynamo.vllm requests / sec",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 12,
"y": 16
},
"id": 20,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"editorMode": "code",
"expr": "dynamo_request_duration_seconds_sum / dynamo_request_duration_seconds_count",
"legendFormat": "__auto",
"range": true,
"refId": "A"
}
],
"title": "dynamo.vllm Avg Request Duration (seconds)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "P1809F7CD0C75ACF3"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "palette-classic"
},
"custom": {
"axisBorderShow": false,
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "",
"axisPlacement": "auto",
"barAlignment": 0,
"barWidthFactor": 0.6,
"drawStyle": "line",
"fillOpacity": 0,
"gradientMode": "none",
"hideFrom": {
"legend": false,
"tooltip": false,
"viz": false
},
"insertNulls": false,
"lineInterpolation": "linear",
"lineWidth": 1,
"pointSize": 5,
"scaleDistribution": {
"type": "linear"
},
"showPoints": "auto",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green"
},
{
"color": "red",
"value": 80
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 16
},
"id": 21,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"hideZeros": false,
"mode": "single",
"sort": "none"
}
},
"pluginVersion": "12.0.1",
"targets": [
{
"editorMode": "code",
"expr": "rate(dynamo_errors_total{endpoint=\"generate\"}[1m])",
"legendFormat": "{{error_type}}",
"range": true,
"refId": "A"
}
],
"title": "dynamo.vllm Avg Errors / sec",
"type": "timeseries"
}
],
"preload": false,
"refresh": "",
"schemaVersion": 41,
"tags": [
"Dynamo",
"DCGM",
"etcd",
"NATS"
],
"tags": [],
"templating": {
"list": []
},
......@@ -632,6 +1021,6 @@
"timepicker": {},
"timezone": "browser",
"title": "Dynamo Dashboard",
"uid": "a7d3733f-f8e7-423a-ab4b-b18e3d7d0357",
"version": 5
"uid": "97ae8df9-138a-4f7a-9b0f-635b77d818fe",
"version": 1
}
\ No newline at end of file
......@@ -34,11 +34,18 @@ scrape_configs:
- targets: ['dcgm-exporter:9401'] # on the "monitoring" network
# This is a demo service that needs to be launched manually. See components/metrics/README.md
# Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 8000/tcp
- job_name: 'dynamo-backend'
# Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 8080/tcp
# You can also force the port, if the default is different: python -m dynamo.frontend --http-port 8080
- job_name: 'dynamo-frontend'
scrape_interval: 10s
static_configs:
- targets: ['host.docker.internal:8000'] # on the "monitoring" network
- targets: ['host.docker.internal:8080'] # on the "monitoring" network
# Launch via: DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 dynamo.<backend> ...
- job_name: 'dynamo-backend'
scrape_interval: 6s
static_configs:
- targets: ['host.docker.internal:8081']
# This is another demo aggregator that needs to be launched manually. See components/metrics/README.md
# Note that you may need to disable the firewall on your host. On Ubuntu: sudo ufw allow 9091/tcp
......
This diff is collapsed.
......@@ -995,8 +995,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"libc",
"wasi 0.11.0+wasi-snapshot-preview1",
"wasm-bindgen",
]
[[package]]
......@@ -1006,9 +1008,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73fea8450eea4bac3940448fb7ae50d91f034f941199fcd9d909a5a07aa455f0"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"libc",
"r-efi",
"wasi 0.14.2+wasi-0.2.4",
"wasm-bindgen",
]
[[package]]
......@@ -1134,6 +1138,23 @@ dependencies = [
"want",
]
[[package]]
name = "hyper-rustls"
version = "0.27.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
dependencies = [
"http",
"hyper",
"hyper-util",
"rustls",
"rustls-pki-types",
"tokio",
"tokio-rustls",
"tower-service",
"webpki-roots 1.0.2",
]
[[package]]
name = "hyper-timeout"
version = "0.5.2"
......@@ -1149,17 +1170,21 @@ dependencies = [
[[package]]
name = "hyper-util"
version = "0.1.11"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2"
checksum = "8d9b05277c7e8da2c93a568989bb6207bef0112e8d17df7a6eda4a3cf143bc5e"
dependencies = [
"base64",
"bytes",
"futures-channel",
"futures-core",
"futures-util",
"http",
"http-body",
"hyper",
"ipnet",
"libc",
"percent-encoding",
"pin-project-lite",
"socket2",
"tokio",
......@@ -1371,6 +1396,22 @@ dependencies = [
"libc",
]
[[package]]
name = "ipnet"
version = "2.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
[[package]]
name = "iri-string"
version = "0.7.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2"
dependencies = [
"memchr",
"serde",
]
[[package]]
name = "itertools"
version = "0.14.0"
......@@ -1478,6 +1519,12 @@ version = "0.4.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
[[package]]
name = "lru-slab"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
[[package]]
name = "matchers"
version = "0.1.0"
......@@ -1993,6 +2040,61 @@ dependencies = [
"thiserror 1.0.69",
]
[[package]]
name = "quinn"
version = "0.11.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "626214629cda6781b6dc1d316ba307189c85ba657213ce642d9c77670f8202c8"
dependencies = [
"bytes",
"cfg_aliases",
"pin-project-lite",
"quinn-proto",
"quinn-udp",
"rustc-hash",
"rustls",
"socket2",
"thiserror 2.0.12",
"tokio",
"tracing",
"web-time",
]
[[package]]
name = "quinn-proto"
version = "0.11.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49df843a9161c85bb8aae55f101bc0bac8bcafd637a620d9122fd7e0b2f7422e"
dependencies = [
"bytes",
"getrandom 0.3.2",
"lru-slab",
"rand 0.9.1",
"ring",
"rustc-hash",
"rustls",
"rustls-pki-types",
"slab",
"thiserror 2.0.12",
"tinyvec",
"tracing",
"web-time",
]
[[package]]
name = "quinn-udp"
version = "0.5.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970"
dependencies = [
"cfg_aliases",
"libc",
"once_cell",
"socket2",
"tracing",
"windows-sys 0.59.0",
]
[[package]]
name = "quote"
version = "1.0.40"
......@@ -2140,6 +2242,47 @@ version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c"
[[package]]
name = "reqwest"
version = "0.12.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cbc931937e6ca3a06e3b6c0aa7841849b160a90351d6ab467a8b9b9959767531"
dependencies = [
"base64",
"bytes",
"futures-core",
"futures-util",
"http",
"http-body",
"http-body-util",
"hyper",
"hyper-rustls",
"hyper-util",
"js-sys",
"log",
"percent-encoding",
"pin-project-lite",
"quinn",
"rustls",
"rustls-pki-types",
"serde",
"serde_json",
"serde_urlencoded",
"sync_wrapper",
"tokio",
"tokio-rustls",
"tokio-util",
"tower 0.5.2",
"tower-http",
"tower-service",
"url",
"wasm-bindgen",
"wasm-bindgen-futures",
"wasm-streams",
"web-sys",
"webpki-roots 1.0.2",
]
[[package]]
name = "ring"
version = "0.17.14"
......@@ -2160,6 +2303,12 @@ version = "0.1.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
[[package]]
name = "rustc-hash"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d"
[[package]]
name = "rustc_version"
version = "0.4.1"
......@@ -2224,6 +2373,9 @@ name = "rustls-pki-types"
version = "1.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "917ce264624a4b4db1c364dcc35bfca9ded014d0a958cd47ad3e960e988ea51c"
dependencies = [
"web-time",
]
[[package]]
name = "rustls-webpki"
......@@ -2542,6 +2694,9 @@ name = "sync_wrapper"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
dependencies = [
"futures-core",
]
[[package]]
name = "synstructure"
......@@ -2574,9 +2729,12 @@ dependencies = [
"dynamo-runtime",
"futures",
"prometheus",
"rand 0.9.1",
"reqwest",
"serde",
"serde_json",
"tokio",
"tokio-test",
]
[[package]]
......@@ -2691,6 +2849,21 @@ dependencies = [
"zerovec",
]
[[package]]
name = "tinyvec"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71"
dependencies = [
"tinyvec_macros",
]
[[package]]
name = "tinyvec_macros"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "tokio"
version = "1.44.2"
......@@ -2741,6 +2914,19 @@ dependencies = [
"tokio",
]
[[package]]
name = "tokio-test"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2468baabc3311435b55dd935f702f42cd1b8abb7e754fb7dfb16bd36aa88f9f7"
dependencies = [
"async-stream",
"bytes",
"futures-core",
"tokio",
"tokio-stream",
]
[[package]]
name = "tokio-util"
version = "0.7.15"
......@@ -2772,7 +2958,7 @@ dependencies = [
"tokio",
"tokio-rustls",
"tokio-util",
"webpki-roots",
"webpki-roots 0.26.8",
]
[[package]]
......@@ -2898,6 +3084,24 @@ dependencies = [
"tracing",
]
[[package]]
name = "tower-http"
version = "0.6.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2"
dependencies = [
"bitflags 2.9.0",
"bytes",
"futures-util",
"http",
"http-body",
"iri-string",
"pin-project-lite",
"tower 0.5.2",
"tower-layer",
"tower-service",
]
[[package]]
name = "tower-layer"
version = "0.3.3"
......@@ -3174,6 +3378,19 @@ dependencies = [
"wasm-bindgen-shared",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.50"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61"
dependencies = [
"cfg-if 1.0.0",
"js-sys",
"once_cell",
"wasm-bindgen",
"web-sys",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.100"
......@@ -3206,6 +3423,39 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "wasm-streams"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65"
dependencies = [
"futures-util",
"js-sys",
"wasm-bindgen",
"wasm-bindgen-futures",
"web-sys",
]
[[package]]
name = "web-sys"
version = "0.3.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "web-time"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
dependencies = [
"js-sys",
"wasm-bindgen",
]
[[package]]
name = "webpki-roots"
version = "0.26.8"
......@@ -3215,6 +3465,15 @@ dependencies = [
"rustls-pki-types",
]
[[package]]
name = "webpki-roots"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e8983c3ab33d6fb807cfcdad2491c4ea8cbc8ed839181c7dfd9c67c83e261b2"
dependencies = [
"rustls-pki-types",
]
[[package]]
name = "winapi"
version = "0.2.8"
......
......@@ -33,4 +33,4 @@ repository = "https://github.com/ai-dynamo/dynamo.git"
[workspace.dependencies]
# local or crates.io
dynamo-runtime = { path = "../" }
prometheus = { workspace = true }
prometheus = { version = "0.14" }
......@@ -22,6 +22,10 @@ license.workspace = true
homepage.workspace = true
repository.workspace = true
[features]
default = []
integration = [] # Integration tests that require NATS
[dependencies]
dynamo-runtime = { workspace = true }
......@@ -31,3 +35,13 @@ serde = { version = "1", features = ["derive"] }
serde_json = { version = "1" }
tokio = { version = "1", features = ["full"] }
prometheus = { version = "0.14" }
[dev-dependencies]
rand = { version = "0.9.0" }
reqwest = { version = "0.12.22", default-features = false, features = ["json", "stream", "rustls-tls"] }
tokio-test = "0.4.4"
[[test]]
name = "integration_test"
path = "tests/integration_test.rs"
required-features = ["integration"]
# System Metrics Example
# Generic Profiling for Work Handlers
Demonstrates custom metrics and monitoring in Dynamo Runtime using Prometheus.
This example demonstrates how to add automatic Prometheus metrics profiling to any work handler without modifying the handler code itself.
## Overview
- Automatic hierarchical labeling: Runtime automatically adds `namespace``component``endpoint` labels
- Uses existing Prometheus implementations
- HTTP metrics endpoint automatically added
The `WorkHandlerMetrics` system provides automatic profiling capabilities that are applied to all work handlers automatically. It automatically tracks:
## Quick Start
- **Request Count**: Total number of requests processed
- **Request Duration**: Time spent processing each request
- **Request/Response Bytes**: Total bytes received and sent
- **Error Count**: Total number of errors encountered
### Build
```bash
cd lib/runtime/examples/system_metrics
cargo build
Additionally, the example demonstrates how to add custom metrics with data bytes tracking in `MySystemStatsMetrics`.
## How It Works
**Automatic Metrics**: All work handlers automatically get profiling metrics without any code changes.
**Custom Metrics**: If you want to add custom metrics IN ADDITION to the automatic ones, you can use the `add_metrics` method:
```rust
use dynamo_runtime::pipeline::network::Ingress;
// Automatic profiling - no code changes needed!
let ingress = Ingress::for_engine(my_handler)?;
// Optional: Add custom metrics IN ADDITION to automatic ones
ingress.add_metrics(&endpoint)?;
```
### Run Server
```bash
export DYN_LOG=1 DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081
cargo run --bin system_server
The endpoint automatically provides proper labeling (namespace, component, endpoint) for all metrics.
## Available Methods
The `Ingress` struct provides methods for metrics:
- **Automatic**: All handlers get profiling metrics automatically
- `Ingress::add_metrics(&endpoint)` - Add custom metrics IN ADDITION to automatic ones (optional)
## Metrics Generated
### Automatic Metrics (No Code Changes Required)
The following Prometheus metrics are automatically created for all work handlers:
### Counters
- `requests_total` - Total requests processed
- `request_bytes_total` - Total bytes received in requests
- `response_bytes_total` - Total bytes sent in responses
- `errors_total` - Total errors encountered (with error_type labels)
### Error Types
The `errors_total` metric includes the following error types:
- `deserialization` - Errors parsing request messages
- `invalid_message` - Unexpected message format
- `response_stream` - Errors creating response streams
- `generate` - Errors in request processing
- `publish_response` - Errors publishing response data
- `publish_final` - Errors publishing final response
### Histograms
- `request_duration_seconds` - Request processing time
### Gauges
- `concurrent_requests` - Number of requests currently being processed
### Custom Metrics (Optional)
- `my_custom_bytes_processed_total` - Total data bytes processed by system handler (example)
### Labels
All metrics automatically include these labels from the endpoint:
- `namespace` - The namespace name
- `component` - The component name
- `endpoint` - The endpoint name
## Example Metrics Output
When the system is running, you'll see metrics from the /metrics HTTP path like this:
```prometheus
# HELP concurrent_requests Number of requests currently being processed by work handler
# TYPE concurrent_requests gauge
concurrent_requests{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 0
# HELP my_custom_bytes_processed_total Example of a custom metric. Total number of data bytes processed by system handler
# TYPE my_custom_bytes_processed_total counter
my_custom_bytes_processed_total{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 42
# HELP request_bytes_total Total number of bytes received in requests by work handler
# TYPE request_bytes_total counter
request_bytes_total{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 1098
# HELP request_duration_seconds Time spent processing requests by work handler
# TYPE request_duration_seconds histogram
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.005"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.01"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.025"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.05"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.1"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.25"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="0.5"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="1"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="2.5"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="5"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="10"} 3
request_duration_seconds_bucket{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace",le="+Inf"} 3
request_duration_seconds_sum{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 0.00048793700000000003
request_duration_seconds_count{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 3
# HELP requests_total Total number of requests processed by work handler
# TYPE requests_total counter
requests_total{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 3
# HELP response_bytes_total Total number of bytes sent in responses by work handler
# TYPE response_bytes_total counter
response_bytes_total{component="dyn_example_component",endpoint="dyn_example_endpoint9881",namespace="dyn_example_namespace"} 1917
# HELP uptime_seconds Total uptime of the DistributedRuntime in seconds
# TYPE uptime_seconds gauge
uptime_seconds{namespace="http_server"} 1.8226759879999999
```
### Run Client
```bash
cargo run --bin system_client
## Examples
### Example 1: Simple Handler with Automatic Profiling
```rust
struct SimpleHandler;
#[async_trait]
impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for SimpleHandler {
async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
// Your business logic here
// No need to add any metrics code!
Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
}
}
// Automatic profiling - no additional code needed!
let ingress = Ingress::for_engine(SimpleHandler::new())?;
```
Note: Running the client will increment `service_requests_total`.
### Example 2: Custom Handler with Data Bytes Tracking
### View Metrics
```bash
curl http://localhost:8081/metrics
```rust
struct RequestHandler {
metrics: Option<Arc<MySystemStatsMetrics>>,
}
#[async_trait]
impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for RequestHandler {
async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
let (data, ctx) = input.into_parts();
// Track data bytes processed (custom metric)
if let Some(metrics) = &self.metrics {
metrics.data_bytes_processed.inc_by(data.len() as u64);
}
// Your business logic here...
Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
}
}
// Create custom metrics and handler
let system_metrics = MySystemStatsMetrics::from_endpoint(&endpoint)?;
let handler = RequestHandler::with_metrics(system_metrics);
let ingress = Ingress::for_engine(handler)?;
// Add custom metrics IN ADDITION to automatic ones
// You'll get both: automatic metrics (requests_total, request_duration_seconds, etc.)
// AND custom metrics (my_custom_bytes_processed_total)
ingress.add_metrics(&endpoint)?;
```
Example output:
## Benefits
1. **Zero Code Changes**: Existing handlers automatically get profiling metrics
2. **Simple API**: Just create an Ingress and you get metrics automatically
3. **Optional Custom Metrics**: Add custom metrics when needed
4. **Automatic Profiling**: Request count, duration, and error tracking out of the box
5. **Automatic Labeling**: Endpoint provides proper namespace/component/endpoint labels
6. **Performance**: Minimal overhead, metrics are only recorded when provided
## Running the Example
**Important**: You must set the `DYN_SYSTEM_PORT` environment variable to specify which port the HTTP server will run on.
```bash
# Run the system metrics example
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 cargo run --bin system_server
```
# HELP service_request_duration_seconds Time spent processing requests
# TYPE service_request_duration_seconds histogram
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.005"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.01"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.025"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.05"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.1"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.25"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="0.5"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="1"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="2.5"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="5"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="10"} 2
service_request_duration_seconds_bucket{component="component",endpoint="endpoint",namespace="system",service="backend",le="+Inf"} 2
service_request_duration_seconds_sum{component="component",endpoint="endpoint",namespace="system",service="backend"} 0.000022239000000000002
service_request_duration_seconds_count{component="component",endpoint="endpoint",namespace="system",service="backend"} 2
# HELP service_requests_total Total number of requests processed
# TYPE service_requests_total counter
service_requests_total{component="component",endpoint="endpoint",namespace="system",service="backend"} 2
# HELP uptime_seconds Total uptime of the DistributedRuntime in seconds
# TYPE uptime_seconds gauge
uptime_seconds{namespace="http_server"} 725.997013676
The server will start an HTTP server on the specified port (8081 in this example) that exposes the Prometheus metrics endpoint at `/metrics`.
To Run an actual LLM frontend + server (aggregated example), launch both of them. By default, the frontend listens to port 8080.
```
python -m dynamo.frontend &
## Configuration
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --no-enable-prefix-caching &
```
Then make curl requests to the frontend (see the [main README](../../../../README.md))
| Variable | Description | Default |
|----------|-------------|---------|
| `DYN_LOG` | Enable logging | `0` |
| `DYN_SYSTEM_ENABLED` | Enable system metrics | `false` |
| `DYN_SYSTEM_PORT` | HTTP server port | `8081` |
## Querying Metrics
## Metrics
Once running, you can query the metrics:
- `service_requests_total`: Request counter
- `service_request_duration_seconds`: Request duration histogram
- `uptime_seconds`: Server uptime gauge
```bash
# Get all work handler metrics
curl http://localhost:8081/metrics | grep -E "(requests_total|request_bytes_total|response_bytes_total|errors_total|request_duration_seconds|concurrent_requests)"
This provides automatic context and grouping for all metrics without manual configuration.
# Get request count for specific endpoint
curl http://localhost:8081/metrics | grep 'requests_total{endpoint="dyn_example_endpoint"}'
## Troubleshooting
# Get request duration histogram
curl http://localhost:8081/metrics | grep 'request_duration_seconds'
- **Port in use**: Change `DYN_SYSTEM_PORT`
- **Connection refused**: Ensure server is running first
- **No metrics**: Verify `DYN_SYSTEM_ENABLED=true`
\ No newline at end of file
# Get custom system metrics
curl http://localhost:8081/metrics | grep 'my_custom_bytes_processed_total'
```
\ No newline at end of file
......@@ -14,7 +14,7 @@
// limitations under the License.
use futures::StreamExt;
use system_metrics::DEFAULT_NAMESPACE;
use system_metrics::{DEFAULT_COMPONENT, DEFAULT_ENDPOINT, DEFAULT_NAMESPACE};
use dynamo_runtime::{
logging, pipeline::PushRouter, protocols::annotated::Annotated, utils::Duration,
......@@ -31,9 +31,9 @@ async fn app(runtime: Runtime) -> Result<()> {
let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
let namespace = distributed.namespace(DEFAULT_NAMESPACE)?;
let component = namespace.component("component")?;
let component = namespace.component(DEFAULT_COMPONENT)?;
let client = component.endpoint("endpoint").client().await?;
let client = component.endpoint(DEFAULT_ENDPOINT).client().await?;
client.wait_for_instances().await?;
let router =
......
......@@ -13,50 +13,8 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use system_metrics::{MyStats, DEFAULT_NAMESPACE};
use dynamo_runtime::{
logging,
metrics::MetricsRegistry,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
ResponseStream, SingleIn,
},
protocols::annotated::Annotated,
stream, DistributedRuntime, Result, Runtime, Worker,
};
use prometheus::{Counter, Histogram};
use std::sync::Arc;
/// Service metrics struct using the metric classes from metrics.rs
pub struct MySystemStatsMetrics {
pub request_counter: Arc<Counter>,
pub request_duration: Arc<Histogram>,
}
impl MySystemStatsMetrics {
/// Create a new ServiceMetrics instance using the metric backend
pub fn new<R: MetricsRegistry>(
metrics_registry: Arc<R>,
) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
let request_counter = metrics_registry.create_counter(
"service_requests_total",
"Total number of requests processed",
&[("service", "backend")],
)?;
let request_duration = metrics_registry.create_histogram(
"service_request_duration_seconds",
"Time spent processing requests",
&[("service", "backend")],
None,
)?;
Ok(Self {
request_counter,
request_duration,
})
}
}
use dynamo_runtime::{logging, DistributedRuntime, Result, Runtime, Worker};
use system_metrics::backend;
fn main() -> Result<()> {
logging::init();
......@@ -66,74 +24,5 @@ fn main() -> Result<()> {
async fn app(runtime: Runtime) -> Result<()> {
let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
backend(distributed).await
}
struct RequestHandler {
metrics: Arc<MySystemStatsMetrics>,
}
impl RequestHandler {
fn new(metrics: Arc<MySystemStatsMetrics>) -> Arc<Self> {
Arc::new(Self { metrics })
}
}
#[async_trait]
impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for RequestHandler {
async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
let start_time = std::time::Instant::now();
// Record request start
self.metrics.request_counter.inc();
let (data, ctx) = input.into_parts();
let chars = data
.chars()
.map(|c| Annotated::from_data(c.to_string()))
.collect::<Vec<_>>();
let stream = stream::iter(chars);
// Record request duration
let duration = start_time.elapsed();
self.metrics
.request_duration
.observe(duration.as_secs_f64());
Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
}
}
async fn backend(drt: DistributedRuntime) -> Result<()> {
let endpoint = drt
.namespace(DEFAULT_NAMESPACE)?
.component("component")?
.service_builder()
.create()
.await?
.endpoint("endpoint");
// make the ingress discoverable via a component service
// we must first create a service, then we can attach one more more endpoints
// attach an ingress to an engine, with the RequestHandler using the metrics struct
let endpoint_metrics = Arc::new(
MySystemStatsMetrics::new(Arc::new(endpoint.clone()))
.map_err(|e| Error::msg(e.to_string()))?,
);
let ingress = Ingress::for_engine(RequestHandler::new(endpoint_metrics.clone()))?;
endpoint
.endpoint_builder()
.stats_handler(|_stats| {
println!("Stats handler called with stats: {:?}", _stats);
let stats = MyStats { val: 10 };
serde_json::to_value(stats).unwrap()
})
.handler(ingress)
.start()
.await?;
Ok(())
backend(distributed, None).await
}
......@@ -13,12 +13,120 @@
// See the License for the specific language governing permissions and
// limitations under the License.
use serde::{Deserialize, Serialize};
use dynamo_runtime::{
metrics::MetricsRegistry,
pipeline::{
async_trait, network::Ingress, AsyncEngine, AsyncEngineContextProvider, Error, ManyOut,
ResponseStream, SingleIn,
},
protocols::annotated::Annotated,
stream, DistributedRuntime, Result,
};
use prometheus::IntCounter;
use std::sync::Arc;
pub const DEFAULT_NAMESPACE: &str = "system";
pub const DEFAULT_NAMESPACE: &str = "dyn_example_namespace";
pub const DEFAULT_COMPONENT: &str = "dyn_example_component";
pub const DEFAULT_ENDPOINT: &str = "dyn_example_endpoint";
#[derive(Serialize, Deserialize)]
// Dummy Stats object to demonstrate how to attach a custom stats handler
/// Stats structure returned by the endpoint's stats handler
#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
pub struct MyStats {
pub val: u32,
// Example value for demonstration purposes
pub val: i32,
}
/// Custom metrics for system stats with data bytes tracking
#[derive(Clone, Debug)]
pub struct MySystemStatsMetrics {
pub data_bytes_processed: Arc<IntCounter>,
}
impl MySystemStatsMetrics {
pub fn from_endpoint(
endpoint: &dynamo_runtime::component::Endpoint,
) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
let data_bytes_processed = endpoint.create_intcounter(
"my_custom_bytes_processed_total",
"Example of a custom metric. Total number of data bytes processed by system handler",
&[],
)?;
Ok(Self {
data_bytes_processed,
})
}
}
#[derive(Clone)]
pub struct RequestHandler {
metrics: Option<Arc<MySystemStatsMetrics>>,
}
impl RequestHandler {
pub fn new() -> Arc<Self> {
Arc::new(Self { metrics: None })
}
pub fn with_metrics(metrics: MySystemStatsMetrics) -> Arc<Self> {
Arc::new(Self {
metrics: Some(Arc::new(metrics)),
})
}
}
#[async_trait]
impl AsyncEngine<SingleIn<String>, ManyOut<Annotated<String>>, Error> for RequestHandler {
async fn generate(&self, input: SingleIn<String>) -> Result<ManyOut<Annotated<String>>> {
let (data, ctx) = input.into_parts();
// Track data bytes processed if metrics are available
if let Some(metrics) = &self.metrics {
metrics.data_bytes_processed.inc_by(data.len() as u64);
}
let chars = data
.chars()
.map(|c| Annotated::from_data(c.to_string()))
.collect::<Vec<_>>();
let stream = stream::iter(chars);
Ok(ResponseStream::new(Box::pin(stream), ctx.context()))
}
}
/// Backend function that sets up the system server with metrics and ingress handler
/// This function can be reused by integration tests to ensure they use the exact same setup
pub async fn backend(drt: DistributedRuntime, endpoint_name: Option<&str>) -> Result<()> {
let endpoint_name = endpoint_name.unwrap_or(DEFAULT_ENDPOINT);
let endpoint = drt
.namespace(DEFAULT_NAMESPACE)?
.component(DEFAULT_COMPONENT)?
.service_builder()
.create()
.await?
.endpoint(endpoint_name);
// Create custom metrics for system stats
let system_metrics =
MySystemStatsMetrics::from_endpoint(&endpoint).expect("Failed to create system metrics");
// Use the factory pattern - single line factory call with metrics
let ingress = Ingress::for_engine(RequestHandler::with_metrics(system_metrics))?;
endpoint
.endpoint_builder()
.stats_handler(|_stats| {
println!("Stats handler called with stats: {:?}", _stats);
// TODO(keivenc): return a real stats object
let stats = MyStats { val: 10 };
serde_json::to_value(stats).unwrap()
})
.handler(ingress)
.start()
.await?;
Ok(())
}
// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#![cfg(feature = "integration")]
use dynamo_runtime::{
pipeline::PushRouter, protocols::annotated::Annotated, DistributedRuntime, Result, Runtime,
};
use futures::StreamExt;
use rand::Rng;
use reqwest;
use std::env;
use system_metrics::{backend, DEFAULT_COMPONENT, DEFAULT_ENDPOINT, DEFAULT_NAMESPACE};
use tokio::time::{sleep, Duration};
#[tokio::test]
async fn test_backend_with_metrics() -> Result<()> {
// Set environment variables for dynamic port allocation
env::set_var("DYN_SYSTEM_ENABLED", "true");
env::set_var("DYN_SYSTEM_PORT", "0");
// Generate a random endpoint name to avoid collisions
let random_suffix = rand::rng().random_range(1000..9999);
let test_endpoint = format!("{}{}", DEFAULT_ENDPOINT, random_suffix);
// Initialize logging
dynamo_runtime::logging::init();
// Create a runtime and distributed runtime for the backend
let runtime = Runtime::from_current()?;
let distributed = DistributedRuntime::from_settings(runtime.clone()).await?;
// Get the HTTP server info to find the actual port
let http_server_info = distributed.http_server_info();
let metrics_port = match http_server_info {
Some(info) => {
println!("HTTP server running on: {}", info.address());
info.port()
}
None => {
panic!("HTTP server not started - check DYN_SYSTEM_ENABLED environment variable");
}
};
// Start the backend in a separate task with custom endpoint
let test_endpoint_clone = test_endpoint.clone();
let backend_handle =
tokio::spawn(async move { backend(distributed, Some(&test_endpoint_clone)).await });
// Give the backend some time to start up
sleep(Duration::from_millis(1000)).await;
// Create a client runtime to connect to the backend
let client_runtime = Runtime::from_current()?;
let client_distributed = DistributedRuntime::from_settings(client_runtime.clone()).await?;
// Connect to the backend similar to system_client.rs
let namespace = client_distributed.namespace(DEFAULT_NAMESPACE)?;
let component = namespace.component(DEFAULT_COMPONENT)?;
let client = component.endpoint(&test_endpoint).client().await?;
// Wait for backend instances to be available
client.wait_for_instances().await?;
// Create a router and send some requests to generate metrics
let router =
PushRouter::<String, Annotated<String>>::from_client(client, Default::default()).await?;
// Send a few test requests to generate metrics
for i in 0..3 {
let test_message = format!("test message {}", i);
let mut stream = router.random(test_message.clone().into()).await?;
// Process the response stream
while let Some(resp) = stream.next().await {
println!("Response {}: {:?}", i, resp);
}
// Small delay between requests
sleep(Duration::from_millis(100)).await;
}
// Give some time for metrics to be updated
sleep(Duration::from_millis(500)).await;
// Now fetch the HTTP metrics endpoint using the dynamic port
let metrics_url = format!("http://localhost:{}/metrics", metrics_port);
println!("Fetching metrics from: {}", metrics_url);
// Make HTTP request to get metrics
let client = reqwest::Client::new();
let response = client.get(&metrics_url).send().await;
match response {
Ok(response) => {
if response.status().is_success() {
let metrics_content = response
.text()
.await
.unwrap_or_else(|_| "Failed to read response body".to_string());
println!("=== METRICS CONTENT ===");
println!("{}", metrics_content);
println!("=== END METRICS CONTENT ===");
// Parse and verify ingress metrics are greater than 0 (except concurrent_requests)
verify_ingress_metrics_greater_than_0(&metrics_content);
println!("Successfully retrieved and verified metrics!");
} else {
println!("HTTP request failed with status: {}", response.status());
panic!("Failed to get metrics: HTTP {}", response.status());
}
}
Err(e) => {
println!("Failed to connect to metrics endpoint: {}", e);
panic!("Failed to connect to metrics endpoint: {}", e);
}
}
// Shutdown the runtime
client_runtime.shutdown();
// Cancel the backend task
backend_handle.abort();
Ok(())
}
fn verify_ingress_metrics_greater_than_0(metrics_content: &str) {
// Define the work handler metrics we want to verify (excluding concurrent_requests which can be 0)
let metrics_to_verify = [
"my_custom_bytes_processed_total",
"requests_total",
"request_bytes_total",
"response_bytes_total",
"request_duration_seconds_count",
"request_duration_seconds_sum",
];
for metric_name in &metrics_to_verify {
let line = metrics_content
.lines()
.find(|l| l.contains(metric_name) && !l.contains("#"))
.unwrap_or_else(|| panic!("{} metric not found", metric_name));
let value = extract_metric_value(line);
assert!(
value > 0.0,
"{} should be greater than 0, got: {}",
metric_name,
value
);
println!("{}: {}", metric_name, value);
}
println!("All work handler metrics verified successfully!");
}
fn extract_metric_value(line: &str) -> f64 {
// Extract the numeric value from a Prometheus metric line
// Format: metric_name{labels} value
line.split_whitespace()
.last()
.expect("Metric line should have a value")
.parse::<f64>()
.expect("Metric value should be a valid number")
}
......@@ -69,6 +69,9 @@ impl EndpointConfigBuilder {
// acquire the registry lock
let registry = endpoint.drt().component_registry.inner.lock().await;
// Add metrics to the handler. The endpoint provides additional information to the handler.
handler.add_metrics(&endpoint)?;
// get the group
let group = registry
.services
......
......@@ -24,6 +24,7 @@ use crate::{
};
use super::{error, Arc, DistributedRuntime, OnceCell, Result, Runtime, SystemHealth, Weak, OK};
use std::sync::OnceLock;
use derive_getters::Dissolve;
use figment::error;
......@@ -97,6 +98,7 @@ impl DistributedRuntime {
etcd_client,
nats_client,
tcp_server: Arc::new(OnceCell::new()),
http_server: Arc::new(OnceLock::new()),
component_registry: component::Registry::new(),
is_static,
instance_sources: Arc::new(Mutex::new(HashMap::new())),
......@@ -121,8 +123,18 @@ impl DistributedRuntime {
)
.await
{
Ok((addr, _)) => {
Ok((addr, handle)) => {
tracing::info!("HTTP server started successfully on {}", addr);
// Store HTTP server information
let http_server_info =
crate::http_server::HttpServerInfo::new(addr, Some(handle));
// Initialize the http_server field
distributed_runtime
.http_server
.set(Arc::new(http_server_info))
.expect("HTTP server info should only be set once");
}
Err(e) => {
tracing::error!("HTTP server startup failed: {}", e);
......@@ -210,6 +222,11 @@ impl DistributedRuntime {
self.nats_client.clone()
}
/// Get HTTP server information if available
pub fn http_server_info(&self) -> Option<Arc<crate::http_server::HttpServerInfo>> {
self.http_server.get().cloned()
}
// todo(ryan): deprecate this as we move to Discovery traits and Component Identifiers
pub fn etcd_client(&self) -> Option<etcd::Client> {
self.etcd_client.clone()
......
......@@ -22,10 +22,47 @@ use std::collections::HashMap;
use std::sync::Arc;
use std::sync::OnceLock;
use std::time::Instant;
use tokio::net::TcpListener;
use tokio::{net::TcpListener, task::JoinHandle};
use tokio_util::sync::CancellationToken;
use tracing;
/// HTTP server information containing socket address and handle
#[derive(Debug)]
pub struct HttpServerInfo {
pub socket_addr: std::net::SocketAddr,
pub handle: Option<Arc<JoinHandle<()>>>,
}
impl HttpServerInfo {
pub fn new(socket_addr: std::net::SocketAddr, handle: Option<JoinHandle<()>>) -> Self {
Self {
socket_addr,
handle: handle.map(Arc::new),
}
}
pub fn address(&self) -> String {
self.socket_addr.to_string()
}
pub fn hostname(&self) -> String {
self.socket_addr.ip().to_string()
}
pub fn port(&self) -> u16 {
self.socket_addr.port()
}
}
impl Clone for HttpServerInfo {
fn clone(&self) -> Self {
Self {
socket_addr: self.socket_addr,
handle: self.handle.clone(),
}
}
}
pub struct HttpMetricsRegistry {
pub drt: Arc<crate::DistributedRuntime>,
}
......@@ -58,8 +95,10 @@ impl HttpServerState {
/// Create new HTTP server state with the provided metrics registry
pub fn new(drt: Arc<crate::DistributedRuntime>) -> anyhow::Result<Self> {
let http_metrics_registry = Arc::new(HttpMetricsRegistry { drt: drt.clone() });
// Note: This metric is created at the DRT level (no namespace), so we manually add "dynamo_" prefix
// to maintain consistency with the project's metric naming convention
let uptime_gauge = http_metrics_registry.as_ref().create_gauge(
"uptime_seconds",
"dynamo_uptime_seconds",
"Total uptime of the DistributedRuntime in seconds",
&[],
)?;
......@@ -293,9 +332,9 @@ mod tests {
println!("Full metrics response:\n{}", response);
let expected = "\
# HELP uptime_seconds Total uptime of the DistributedRuntime in seconds
# TYPE uptime_seconds gauge
uptime_seconds{namespace=\"http_server\"} 42
# HELP dynamo_uptime_seconds Total uptime of the DistributedRuntime in seconds
# TYPE dynamo_uptime_seconds gauge
dynamo_uptime_seconds{namespace=\"http_server\"} 42
";
assert_eq!(response, expected);
}
......@@ -405,6 +444,9 @@ uptime_seconds{namespace=\"http_server\"} 42
#[tokio::test]
async fn test_spawn_http_server_endpoints() {
// use reqwest for HTTP requests
temp_env::async_with_vars(
[("DYN_SYSTEM_STARTING_HEALTH_STATUS", Some("ready"))],
async {
let cancel_token = CancellationToken::new();
let drt = create_test_drt_async().await;
let (addr, server_handle) =
......@@ -416,8 +458,8 @@ uptime_seconds{namespace=\"http_server\"} 42
println!("[test] Server should be up, starting requests...");
let client = reqwest::Client::new();
for (path, expect_200, expect_body) in [
("/health", true, "OK"),
("/live", true, "OK"),
("/health", true, "ready"),
("/live", true, "ready"),
("/someRandomPathNotFoundHere", false, "Route not found"),
] {
println!("[test] Sending request to {}", path);
......@@ -452,6 +494,9 @@ uptime_seconds{namespace=\"http_server\"} 42
}
}
}
},
)
.await;
}
#[cfg(feature = "integration")]
......
......@@ -20,7 +20,7 @@
use std::{
collections::HashMap,
sync::{Arc, Weak},
sync::{Arc, OnceLock, Weak},
};
use tokio::sync::Mutex;
......@@ -37,6 +37,7 @@ pub mod component;
pub mod discovery;
pub mod engine;
pub mod http_server;
pub use http_server::HttpServerInfo;
pub mod logging;
pub mod metrics;
pub mod pipeline;
......@@ -150,6 +151,7 @@ pub struct DistributedRuntime {
etcd_client: Option<transports::etcd::Client>,
nats_client: transports::nats::Client,
tcp_server: Arc<OnceCell<Arc<transports::tcp::server::TcpStreamServer>>>,
http_server: Arc<OnceLock<Arc<http_server::HttpServerInfo>>>,
// local registry for components
// the registry allows us to use share runtime resources across instances of the same component object.
......
This diff is collapsed.
......@@ -36,6 +36,11 @@ use super::{
context, AsyncTransportEngine, Context, Data, Error, ManyOut, PipelineError, PipelineIO,
SegmentSource, ServiceBackend, ServiceEngine, SingleIn, Source,
};
use ingress::push_handler::WorkHandlerMetrics;
// Add Prometheus metrics types
use crate::metrics::MetricsRegistry;
use prometheus::{CounterVec, Histogram, IntCounter, IntCounterVec, IntGauge};
pub trait Codable: PipelineIO + Serialize + for<'de> Deserialize<'de> {}
impl<T: PipelineIO + Serialize + for<'de> Deserialize<'de>> Codable for T {}
......@@ -278,12 +283,14 @@ struct RequestControlMessage {
pub struct Ingress<Req: PipelineIO, Resp: PipelineIO> {
segment: OnceLock<Arc<SegmentSource<Req, Resp>>>,
metrics: OnceLock<Arc<WorkHandlerMetrics>>,
}
impl<Req: PipelineIO + Sync, Resp: PipelineIO> Ingress<Req, Resp> {
pub fn new() -> Arc<Self> {
Arc::new(Self {
segment: OnceLock::new(),
metrics: OnceLock::new(),
})
}
......@@ -293,6 +300,15 @@ impl<Req: PipelineIO + Sync, Resp: PipelineIO> Ingress<Req, Resp> {
.map_err(|_| anyhow::anyhow!("Segment already set"))
}
pub fn add_metrics(&self, endpoint: &crate::component::Endpoint) -> Result<()> {
let metrics = WorkHandlerMetrics::from_endpoint(endpoint)
.map_err(|e| anyhow::anyhow!("Failed to create work handler metrics: {}", e))?;
self.metrics
.set(Arc::new(metrics))
.map_err(|_| anyhow::anyhow!("Metrics already set"))
}
pub fn link(segment: Arc<SegmentSource<Req, Resp>>) -> Result<Arc<Self>> {
let ingress = Ingress::new();
ingress.attach(segment)?;
......@@ -317,11 +333,19 @@ impl<Req: PipelineIO + Sync, Resp: PipelineIO> Ingress<Req, Resp> {
Ok(ingress)
}
/// Helper method to access metrics if available
fn metrics(&self) -> Option<&Arc<WorkHandlerMetrics>> {
self.metrics.get()
}
}
#[async_trait]
pub trait PushWorkHandler: Send + Sync {
async fn handle_payload(&self, payload: Bytes) -> Result<(), PipelineError>;
/// Add metrics to the handler
fn add_metrics(&self, endpoint: &crate::component::Endpoint) -> Result<()>;
}
/*
......
......@@ -14,7 +14,92 @@
// limitations under the License.
use super::*;
use prometheus::{Histogram, IntCounter, IntCounterVec, IntGauge};
use serde::{Deserialize, Serialize};
use std::sync::Arc;
/// Metrics configuration for profiling work handlers
#[derive(Clone, Debug)]
pub struct WorkHandlerMetrics {
pub request_counter: Arc<IntCounter>,
pub request_duration: Arc<Histogram>,
pub concurrent_requests: Arc<IntGauge>,
pub request_bytes: Arc<IntCounter>,
pub response_bytes: Arc<IntCounter>,
pub error_counter: Arc<IntCounterVec>,
}
impl WorkHandlerMetrics {
pub fn new(
request_counter: Arc<IntCounter>,
request_duration: Arc<Histogram>,
concurrent_requests: Arc<IntGauge>,
request_bytes: Arc<IntCounter>,
response_bytes: Arc<IntCounter>,
error_counter: Arc<IntCounterVec>,
) -> Self {
Self {
request_counter,
request_duration,
concurrent_requests,
request_bytes,
response_bytes,
error_counter,
}
}
/// Create WorkHandlerMetrics from an endpoint using its built-in labeling
pub fn from_endpoint(
endpoint: &crate::component::Endpoint,
) -> Result<Self, Box<dyn std::error::Error + Send + Sync>> {
let request_counter = endpoint.create_intcounter(
"requests_total",
"Total number of requests processed by work handler",
&[],
)?;
let request_duration = endpoint.create_histogram(
"request_duration_seconds",
"Time spent processing requests by work handler",
&[],
None,
)?;
let concurrent_requests = endpoint.create_intgauge(
"concurrent_requests",
"Number of requests currently being processed by work handler",
&[],
)?;
let request_bytes = endpoint.create_intcounter(
"request_bytes_total",
"Total number of bytes received in requests by work handler",
&[],
)?;
let response_bytes = endpoint.create_intcounter(
"response_bytes_total",
"Total number of bytes sent in responses by work handler",
&[],
)?;
let error_counter = endpoint.create_intcountervec(
"errors_total",
"Total number of errors in work handler processing",
&["error_type"],
&[],
)?;
Ok(Self::new(
request_counter,
request_duration,
concurrent_requests,
request_bytes,
response_bytes,
error_counter,
))
}
}
#[async_trait]
impl<T: Data, U: Data> PushWorkHandler for Ingress<SingleIn<T>, ManyOut<U>>
......@@ -22,7 +107,21 @@ where
T: Data + for<'de> Deserialize<'de> + std::fmt::Debug,
U: Data + Serialize + std::fmt::Debug,
{
fn add_metrics(&self, endpoint: &crate::component::Endpoint) -> Result<()> {
// Call the Ingress-specific add_metrics implementation
use crate::pipeline::network::Ingress;
Ingress::add_metrics(self, endpoint)
}
async fn handle_payload(&self, payload: Bytes) -> Result<(), PipelineError> {
let start_time = std::time::Instant::now();
if let Some(m) = self.metrics() {
m.request_counter.inc();
m.concurrent_requests.inc();
m.request_bytes.inc_by(payload.len() as u64);
}
// decode the control message and the request
let msg = TwoPartCodec::default()
.decode_message(payload)?
......@@ -41,6 +140,11 @@ where
Ok(cm) => cm,
Err(err) => {
let json_str = String::from_utf8_lossy(&header);
if let Some(m) = self.metrics() {
m.error_counter
.with_label_values(&["deserialization"])
.inc();
}
return Err(PipelineError::DeserializationError(
format!("Failed deserializing to RequestControlMessage. err={err}, json_str={json_str}"),
));
......@@ -50,6 +154,11 @@ where
(control_msg, request)
}
_ => {
if let Some(m) = self.metrics() {
m.error_counter
.with_label_values(&["invalid_message"])
.inc();
}
return Err(PipelineError::Generic(String::from("Unexpected message from work queue; unable extract a TwoPartMessage with a header and data")));
}
};
......@@ -68,6 +177,11 @@ where
)
.await
.map_err(|e| {
if let Some(m) = self.metrics() {
m.error_counter
.with_label_values(&["response_stream"])
.inc();
}
PipelineError::Generic(format!("Failed to create response stream: {:?}", e,))
})?;
......@@ -78,7 +192,12 @@ where
.expect("segment not set")
.generate(request)
.await
.map_err(PipelineError::GenerateError);
.map_err(|e| {
if let Some(m) = self.metrics() {
m.error_counter.with_label_values(&["generate"]).inc();
}
PipelineError::GenerateError(e)
});
// the prolouge is sent to the client to indicate that the stream is ready to receive data
// or if the generate call failed, the error is sent to the client
......@@ -107,10 +226,18 @@ where
};
let resp_bytes = serde_json::to_vec(&resp_wrapper)
.expect("fatal error: invalid response object - this should never happen");
if let Some(m) = self.metrics() {
m.response_bytes.inc_by(resp_bytes.len() as u64);
}
if (publisher.send(resp_bytes.into()).await).is_err() {
tracing::error!("Failed to publish response for stream {}", context.id());
context.stop_generating();
send_complete_final = false;
if let Some(m) = self.metrics() {
m.error_counter
.with_label_values(&["publish_response"])
.inc();
}
break;
}
}
......@@ -121,13 +248,25 @@ where
};
let resp_bytes = serde_json::to_vec(&resp_wrapper)
.expect("fatal error: invalid response object - this should never happen");
if let Some(m) = self.metrics() {
m.response_bytes.inc_by(resp_bytes.len() as u64);
}
if (publisher.send(resp_bytes.into()).await).is_err() {
tracing::error!(
"Failed to publish complete final for stream {}",
context.id()
);
if let Some(m) = self.metrics() {
m.error_counter.with_label_values(&["publish_final"]).inc();
}
}
}
if let Some(m) = self.metrics() {
let duration = start_time.elapsed();
m.request_duration.observe(duration.as_secs_f64());
m.concurrent_requests.dec();
}
Ok(())
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment