Unverified Commit 6bccf099 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: deprecate DYN_SYSTEM_ENABLED in favor of DYN_SYSTEM_PORT (#4082)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent d1cf3c2c
......@@ -251,6 +251,16 @@ async def async_main():
dump_config(flags.dump_config_to, flags)
is_static = bool(flags.static_endpoint) # true if the string has a value
# Warn if DYN_SYSTEM_PORT is set (frontend doesn't use system metrics server)
if os.environ.get("DYN_SYSTEM_PORT"):
logger.warning(
"=" * 80 + "\n"
"WARNING: DYN_SYSTEM_PORT is set but NOT used by the frontend!\n"
"The frontend does not expose a system metrics server.\n"
"Only backend workers should set DYN_SYSTEM_PORT.\n"
"Use --http-port to configure the frontend HTTP API port.\n" + "=" * 80
)
# Configure Dynamo frontend HTTP service metrics prefix
if flags.metrics_prefix is not None:
prefix = flags.metrics_prefix.strip()
......
......@@ -357,13 +357,13 @@ python -m dynamo.frontend &
# 4. Start worker backend (choose one framework):
# vLLM
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 --enforce-eager --no-enable-prefix-caching --max-num-seqs 64 &
DYN_SYSTEM_PORT=8081 python -m dynamo.vllm --model Qwen/Qwen3-0.6B --gpu-memory-utilization 0.20 --enforce-eager --no-enable-prefix-caching --max-num-seqs 64 &
# SGLang
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.sglang --model Qwen/Qwen3-0.6B --mem-fraction-static 0.20 --max-running-requests 64 &
DYN_SYSTEM_PORT=8081 python -m dynamo.sglang --model Qwen/Qwen3-0.6B --mem-fraction-static 0.20 --max-running-requests 64 &
# TensorRT-LLM
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 python -m dynamo.trtllm --model Qwen/Qwen3-0.6B --free-gpu-memory-fraction 0.20 --max-num-tokens 8192 --max-batch-size 64 &
DYN_SYSTEM_PORT=8081 python -m dynamo.trtllm --model Qwen/Qwen3-0.6B --free-gpu-memory-fraction 0.20 --max-num-tokens 8192 --max-batch-size 64 &
```
**Framework-Specific GPU Memory Arguments:**
......
......@@ -135,9 +135,8 @@ The operator automatically injects environment variables based on component type
### Worker Components
- **`DYN_SYSTEM_ENABLED`**: `true`
- **`DYN_SYSTEM_PORT`**: `9090` (automatically enables the system metrics server)
- **`DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS`**: `["generate"]`
- **`DYN_SYSTEM_PORT`**: `9090`
### Planner Components
......
......@@ -827,7 +827,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{Name: "DYN_NAMESPACE", Value: "default"},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"},
{Name: "DYN_PARENT_DGD_K8S_NAMESPACE", Value: "default"},
{Name: "DYN_SYSTEM_ENABLED", Value: "true"},
{Name: "DYN_SYSTEM_PORT", Value: "9090"},
{Name: "DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS", Value: "[\"generate\"]"},
{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"},
......@@ -941,7 +940,6 @@ func TestDynamoComponentDeploymentReconciler_generateLeaderWorkerSet(t *testing.
{Name: "DYN_NAMESPACE", Value: "default"},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-lws-deploy"},
{Name: "DYN_PARENT_DGD_K8S_NAMESPACE", Value: "default"},
{Name: "DYN_SYSTEM_ENABLED", Value: "true"},
{Name: "DYN_SYSTEM_PORT", Value: "9090"},
{Name: "DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS", Value: "[\"generate\"]"},
{Name: "TEST_ENV_FROM_DYNAMO_COMPONENT_DEPLOYMENT_SPEC", Value: "test_value_from_dynamo_component_deployment_spec"},
......
......@@ -74,10 +74,6 @@ func (w *WorkerDefaults) GetBaseContainer(context ComponentContext) (corev1.Cont
}
container.Env = append(container.Env, []corev1.EnvVar{
{
Name: "DYN_SYSTEM_ENABLED",
Value: "true",
},
{
Name: "DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS",
Value: "[\"generate\"]",
......
......@@ -1818,10 +1818,6 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
},
{
Name: "DYN_SYSTEM_ENABLED",
Value: "true",
},
{
Name: "DYN_SYSTEM_PORT",
Value: "9090",
......@@ -1971,10 +1967,6 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
},
{
Name: "DYN_SYSTEM_ENABLED",
Value: "true",
},
{
Name: "DYN_SYSTEM_PORT",
Value: "9090",
......@@ -2623,10 +2615,6 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
},
{
Name: "DYN_SYSTEM_ENABLED",
Value: "true",
},
{
Name: "DYN_SYSTEM_PORT",
Value: "9090",
......@@ -2763,10 +2751,6 @@ func TestGenerateGrovePodCliqueSet(t *testing.T) {
Name: "DYNAMO_POD_GANG_SET_REPLICAS",
Value: "1",
},
{
Name: "DYN_SYSTEM_ENABLED",
Value: "true",
},
{
Name: "DYN_SYSTEM_PORT",
Value: "9090",
......@@ -4595,7 +4579,6 @@ func TestGenerateBasePodSpec_Worker(t *testing.T) {
{Name: "DYN_NAMESPACE", Value: ""},
{Name: "DYN_PARENT_DGD_K8S_NAME", Value: "test-deployment"},
{Name: "DYN_PARENT_DGD_K8S_NAMESPACE", Value: "default"},
{Name: "DYN_SYSTEM_ENABLED", Value: "true"},
{Name: "DYN_SYSTEM_PORT", Value: "9090"},
{Name: "DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS", Value: "[\"generate\"]"},
},
......
......@@ -113,8 +113,6 @@ spec:
- name: DYN_HTTP_PORT
value: "{{ $.Values.dynamoPort | default 8000 }}"
{{- else if $serviceSpec.componentType | eq "worker" }}
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_PORT
value: "{{ $.Values.dynamoSystemPort | default 9090 }}"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
......
......@@ -105,8 +105,6 @@ spec:
- name: DYN_HTTP_PORT
value: "{{ $.Values.dynamoPort | default 8000 }}"
{{- else if $serviceSpec.componentType | eq "worker" }}
- name: DYN_SYSTEM_ENABLED
value: "true"
- name: DYN_SYSTEM_PORT
value: "{{ $.Values.dynamoSystemPort | default 9090 }}"
- name: DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS
......
......@@ -41,7 +41,7 @@ scrape_configs:
static_configs:
- targets: ['host.docker.internal:8000'] # on the "monitoring" network
# Launch via: DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 dynamo.<backend> ...
# Launch via: DYN_SYSTEM_PORT=8081 dynamo.<backend> ...
# If you want to update the scrape_interval, you may want to also update component.rs's MAX_DELAY
- job_name: 'dynamo-backend'
scrape_interval: 6s
......
......@@ -43,8 +43,7 @@ To see the actual metrics available in your SGLang version:
### 1. Launch SGLang with Metrics Enabled
```bash
# Set environment variables
export DYN_SYSTEM_ENABLED=true
# Set system metrics port (automatically enables metrics server)
export DYN_SYSTEM_PORT=8081
# Start SGLang worker with metrics enabled
......
......@@ -63,8 +63,7 @@ To see the actual metrics available in your TensorRT-LLM version:
### 1. Launch TensorRT-LLM with Metrics Enabled
```bash
# Set environment variables
export DYN_SYSTEM_ENABLED=true
# Set system metrics port (automatically enables metrics server)
export DYN_SYSTEM_PORT=8081
# Start TensorRT-LLM worker with metrics enabled
......
......@@ -44,8 +44,7 @@ To see the actual metrics available in your vLLM version:
### 1. Launch vLLM with Metrics Enabled
```bash
# Set environment variables
export DYN_SYSTEM_ENABLED=true
# Set system metrics port (automatically enables metrics server)
export DYN_SYSTEM_PORT=8081
# Start vLLM worker (metrics enabled by default via --disable-log-stats=false)
......
......@@ -618,9 +618,8 @@ The operator automatically injects environment variables based on component type
### Worker Components
- **`DYN_SYSTEM_ENABLED`**: `true`
- **`DYN_SYSTEM_PORT`**: `9090` (automatically enables the system metrics server)
- **`DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS`**: `["generate"]`
- **`DYN_SYSTEM_PORT`**: `9090`
### Planner Components
......
......@@ -128,8 +128,7 @@ when initializing and HTTP status code `HTTP/1.1 200 OK` once ready.
| **Environment Variable** | **Description** | **Example Settings** |
| -------------------------| ------------------- | ------------------------------------------------ |
| `DYN_SYSTEM_ENABLED` | Enables the system status server. | `true`, `false` |
| `DYN_SYSTEM_PORT` | Specifies the port for the system status server. | `9090` |
| `DYN_SYSTEM_PORT` | Specifies the port for the system status server (automatically enables it when set to a positive value). | `9090`, `8081` |
| `DYN_SYSTEM_STARTING_HEALTH_STATUS` | Sets the initial health status of the system (ready/not ready). | `ready`, `notready` |
| `DYN_SYSTEM_HEALTH_PATH` | Custom path for the health endpoint. | `/custom/health` |
| `DYN_SYSTEM_LIVE_PATH` | Custom path for the liveness endpoint. | `/custom/live` |
......@@ -138,10 +137,9 @@ when initializing and HTTP status code `HTTP/1.1 200 OK` once ready.
### Example Environment Setting
```
export DYN_SYSTEM_ENABLED="true"
export DYN_SYSTEM_PORT=9090
export DYN_SYSTEM_STARTING_HEALTH_STATUS="notready"
export DYN_SYSTEM_USE_ENDPOINT_HEALTH_STATUS="[\"generate\"]"
export DYN_SYSTEM_PORT=9090
```
#### Example Request
......
......@@ -214,7 +214,6 @@ The following shows the JSONL logs from the frontend service for the same reques
```
{"time":"2025-10-31T20:52:07.707164Z","level":"INFO","file":"/opt/dynamo/lib/runtime/src/logging.rs","line":806,"target":"dynamo_runtime::logging","message":"OpenTelemetry OTLP export enabled","endpoint":"http://tempo.tm.svc.cluster.local:4317","service":"frontend"}
...
{"time":"2025-10-31T20:52:10.707164Z","level":"DEBUG","file":"/opt/dynamo/lib/runtime/src/pipeline/network/tcp/server.rs","line":230,"target":"dynamo_runtime::pipeline::network::tcp::server","message":"Registering new TcpStream on 10.0.4.65:41959","method":"POST","span_id":"5c20cc08e6afb2b7","span_name":"http-request","trace_id":"b672ccf48683b392891c5cb4163d4b51","uri":"/v1/chat/completions","version":"HTTP/1.1"}
{"time":"2025-10-31T20:52:10.745264Z","level":"DEBUG","file":"/opt/dynamo/lib/llm/src/kv_router/prefill_router.rs","line":232,"target":"dynamo_llm::kv_router::prefill_router","message":"Prefill succeeded, using disaggregated params for decode","method":"POST","span_id":"5c20cc08e6afb2b7","span_name":"http-request","trace_id":"b672ccf48683b392891c5cb4163d4b51","uri":"/v1/chat/completions","version":"HTTP/1.1"}
{"time":"2025-10-31T20:52:10.745545Z","level":"DEBUG","file":"/opt/dynamo/lib/runtime/src/pipeline/network/tcp/server.rs","line":230,"target":"dynamo_runtime::pipeline::network::tcp::server","message":"Registering new TcpStream on 10.0.4.65:41959","method":"POST","span_id":"5c20cc08e6afb2b7","span_name":"http-request","trace_id":"b672ccf48683b392891c5cb4163d4b51","uri":"/v1/chat/completions","version":"HTTP/1.1"}
......
......@@ -446,7 +446,7 @@ counter.inc();
- **Automatic registration**: Metrics created via endpoint's `metrics().create_*()` factory methods are automatically registered with the system
- Automatic labeling with namespace, component, and endpoint information
- Consistent metric naming with `dynamo_` prefix
- Built-in HTTP metrics endpoint when enabled with `DYN_SYSTEM_ENABLED=true`
- Built-in HTTP metrics endpoint when `DYN_SYSTEM_PORT` is set to a positive value
- Hierarchical metric organization
### Advanced Features
......
......@@ -23,7 +23,7 @@ python3 -m dynamo.frontend --http-port 8000 &
DYNAMO_PID=$!
# Run worker
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
DYN_SYSTEM_PORT=8081 \
python3 -m dynamo.trtllm \
--model-path "$MODEL_PATH" \
--served-model-name "$SERVED_MODEL_NAME" \
......
......@@ -9,5 +9,5 @@ python -m dynamo.frontend --http-port=8000 &
# run worker
# --enforce-eager is added for quick deployment. for production use, need to remove this flag
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 \
DYN_SYSTEM_PORT=8081 \
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --enforce-eager --connector none
......@@ -379,19 +379,18 @@ maturin develop
```bash
cd ~/dynamo/lib/bindings/python/examples/metrics
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 ./server_with_loop.py
DYN_SYSTEM_PORT=8081 ./server_with_loop.py
```
#### Run Example B: Callback-based Updates
```bash
cd ~/dynamo/lib/bindings/python/examples/metrics
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 ./server_with_callback.py
DYN_SYSTEM_PORT=8081 ./server_with_callback.py
```
**Note:** The environment variables are required:
- `DYN_SYSTEM_ENABLED=true` - Enables the system status server
- `DYN_SYSTEM_PORT=8081` - Sets the port for the metrics endpoint
**Note:** The environment variable is required:
- `DYN_SYSTEM_PORT=8081` - Sets the port for the metrics endpoint (automatically enables the system status server)
#### Check the Metrics
......
......@@ -12,7 +12,7 @@ This shows how Python code can:
4. The metrics are automatically served via the /metrics endpoint
Usage:
DYN_SYSTEM_ENABLED=true DYN_SYSTEM_PORT=8081 ./server_with_callback.py
DYN_SYSTEM_PORT=8081 ./server_with_callback.py
# In another terminal, query the metrics:
curl http://localhost:8081/metrics
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment