"...ssh:/git@developer.sourcefind.cn:2222/OpenDAS/dynamo.git" did not exist on "4000097653a9d6d345be96ff1466532a3a30f6f5"
Unverified Commit c6555852 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

fix: enable LMCache metrics visibility with PROMETHEUS_MULTIPROC_DIR (#4654)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
Co-authored-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent a6a4f360
...@@ -9,6 +9,7 @@ import tempfile ...@@ -9,6 +9,7 @@ import tempfile
from typing import Optional from typing import Optional
import uvloop import uvloop
from prometheus_client import REGISTRY, CollectorRegistry, multiprocess
from vllm.distributed.kv_events import ZmqEventPublisher from vllm.distributed.kv_events import ZmqEventPublisher
from vllm.usage.usage_lib import UsageContext from vllm.usage.usage_lib import UsageContext
from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.async_llm import AsyncLLM
...@@ -16,6 +17,7 @@ from vllm.v1.metrics.prometheus import setup_multiprocess_prometheus ...@@ -16,6 +17,7 @@ from vllm.v1.metrics.prometheus import setup_multiprocess_prometheus
from dynamo.common.config_dump import dump_config from dynamo.common.config_dump import dump_config
from dynamo.common.utils.endpoint_types import parse_endpoint_types from dynamo.common.utils.endpoint_types import parse_endpoint_types
from dynamo.common.utils.prometheus import register_engine_metrics_callback
from dynamo.llm import ( from dynamo.llm import (
ModelInput, ModelInput,
ModelRuntimeConfig, ModelRuntimeConfig,
...@@ -106,6 +108,64 @@ async def worker(): ...@@ -106,6 +108,64 @@ async def worker():
logger.debug("Worker function completed, exiting...") logger.debug("Worker function completed, exiting...")
def setup_metrics_collection(config: Config, generate_endpoint, logger):
"""Set up metrics collection for vLLM and LMCache metrics.
In multiprocess mode (PROMETHEUS_MULTIPROC_DIR set), metrics are stored:
1. In-memory: Metric objects in global REGISTRY
2. On-disk: Metric values in .db files (PROMETHEUS_MULTIPROC_DIR)
MultiProcessCollector reads from .db files but adding it to REGISTRY can fail
with "Duplicated timeseries" if PROMETHEUS_MULTIPROC_DIR was set before process
started (K8s deployments) because metrics are already in REGISTRY.
Solution: Try adding MultiProcessCollector to REGISTRY. If that fails, use
separate registry for multiprocess collection and register callbacks to both
registries to ensure all metrics (vllm, lmcache, dynamo_component) are collected.
"""
if config.engine_args.disable_log_stats is False:
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
try:
# MultiProcessCollector reads metrics from .db files in PROMETHEUS_MULTIPROC_DIR
# Adding it to REGISTRY allows collecting both in-memory and .db file metrics
multiprocess.MultiProcessCollector(REGISTRY)
logger.debug("Added MultiProcessCollector to global REGISTRY")
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=REGISTRY,
metric_prefix_filters=["vllm:", "lmcache:"],
)
except ValueError as e:
# Conflict: metrics already in REGISTRY, MultiProcessCollector tries to add same metrics from .db files
# Solution: Use separate registry that ONLY reads from .db files (no in-memory conflicts)
logger.debug(
f"Could not add MultiProcessCollector to REGISTRY ({e}), using separate registry"
)
multiproc_registry = CollectorRegistry()
multiprocess.MultiProcessCollector(multiproc_registry)
# Register both registries to collect all metrics
# Global REGISTRY has in-memory metrics (vllm, dynamo_component)
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=REGISTRY,
metric_prefix_filters=["vllm:", "dynamo_component:"],
)
# Multiproc registry has .db file metrics (lmcache, possibly vllm duplicates)
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=multiproc_registry,
metric_prefix_filters=["vllm:", "lmcache:"],
)
else:
# No multiprocess mode
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=REGISTRY,
metric_prefix_filters=["vllm:", "lmcache:"],
)
def setup_kv_event_publisher( def setup_kv_event_publisher(
config: Config, config: Config,
component, component,
...@@ -176,11 +236,9 @@ def setup_kv_event_publisher( ...@@ -176,11 +236,9 @@ def setup_kv_event_publisher(
def setup_vllm_engine(config, stat_logger=None): def setup_vllm_engine(config, stat_logger=None):
# Existing vLLM v0.11.0 bug: vllm/v1/metrics/prometheus.py:79 passes TemporaryDirectory object instead of # vLLM v0.11.0 bug: vllm/v1.metrics/prometheus.py:79 passes TemporaryDirectory object
# the .name string, causing a false error message when vLLM exits. Therefore, always set # instead of .name string, causing false error on exit. Set PROMETHEUS_MULTIPROC_DIR
# PROMETHEUS_MULTIPROC_DIR first, and we'll do the path cleanup. # ourselves to avoid this and handle cleanup properly.
# This vLLM bug causes a false error message when vLLM exits.
prometheus_temp_dir = None prometheus_temp_dir = None
if "PROMETHEUS_MULTIPROC_DIR" not in os.environ: if "PROMETHEUS_MULTIPROC_DIR" not in os.environ:
prometheus_temp_dir = tempfile.TemporaryDirectory(prefix="vllm_prometheus_") prometheus_temp_dir = tempfile.TemporaryDirectory(prefix="vllm_prometheus_")
...@@ -356,31 +414,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config): ...@@ -356,31 +414,7 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
if kv_publishers: if kv_publishers:
handler.kv_publishers = kv_publishers handler.kv_publishers = kv_publishers
if config.engine_args.disable_log_stats is False: setup_metrics_collection(config, generate_endpoint, logger)
# vLLM v1 registers its metrics with 'vllm:' prefix
from prometheus_client import REGISTRY, multiprocess
from dynamo.common.utils.prometheus import register_engine_metrics_callback
# Option 1: Try adding MultiProcessCollector to the global REGISTRY
# This would make REGISTRY collect from both its registered metrics AND multiprocess files
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
try:
# Add MultiProcessCollector to global REGISTRY
# This makes REGISTRY collect from .db files in addition to its own metrics
multiprocess.MultiProcessCollector(REGISTRY)
logger.info("Added MultiProcessCollector to global REGISTRY")
except ValueError as e:
# Might already be registered or directory issues
logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}")
# Register callback with the global REGISTRY
# Now it should collect both its own metrics AND multiprocess metrics
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=REGISTRY,
metric_prefix_filters=["vllm:", "lmcache:"],
)
# Register prefill model with ModelType.Prefill # Register prefill model with ModelType.Prefill
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
...@@ -493,31 +527,7 @@ async def init(runtime: DistributedRuntime, config: Config): ...@@ -493,31 +527,7 @@ async def init(runtime: DistributedRuntime, config: Config):
if kv_publishers: if kv_publishers:
handler.kv_publishers = kv_publishers handler.kv_publishers = kv_publishers
if config.engine_args.disable_log_stats is False: setup_metrics_collection(config, generate_endpoint, logger)
# vLLM v1 registers its metrics with 'vllm:' prefix
from prometheus_client import REGISTRY, multiprocess
from dynamo.common.utils.prometheus import register_engine_metrics_callback
# Option 1: Try adding MultiProcessCollector to the global REGISTRY
# This would make REGISTRY collect from both its registered metrics AND multiprocess files
if os.environ.get("PROMETHEUS_MULTIPROC_DIR"):
try:
# Add MultiProcessCollector to global REGISTRY
# This makes REGISTRY collect from .db files in addition to its own metrics
multiprocess.MultiProcessCollector(REGISTRY)
logger.info("Added MultiProcessCollector to global REGISTRY")
except ValueError as e:
# Might already be registered or directory issues
logger.warning(f"Could not add MultiProcessCollector to REGISTRY: {e}")
# Register callback with the global REGISTRY
# Now it should collect both its own metrics AND multiprocess metrics
register_engine_metrics_callback(
endpoint=generate_endpoint,
registry=REGISTRY,
metric_prefix_filters=["vllm:", "lmcache:"],
)
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
# Parse endpoint types from --dyn-endpoint-types flag # Parse endpoint types from --dyn-endpoint-types flag
......
...@@ -156,6 +156,7 @@ When LMCache is enabled with `--connector lmcache` and `DYN_SYSTEM_PORT` is set, ...@@ -156,6 +156,7 @@ When LMCache is enabled with `--connector lmcache` and `DYN_SYSTEM_PORT` is set,
**Requirements to access LMCache metrics:** **Requirements to access LMCache metrics:**
- `--connector lmcache` - Enables LMCache - `--connector lmcache` - Enables LMCache
- `DYN_SYSTEM_PORT=8081` - Enables metrics HTTP endpoint - `DYN_SYSTEM_PORT=8081` - Enables metrics HTTP endpoint
- `PROMETHEUS_MULTIPROC_DIR` (optional) - If not set, Dynamo manages it internally. Only set explicitly if you need control over the metrics directory.
For detailed information on LMCache metrics, including the complete list of available metrics and how to access them, see the **[LMCache Metrics section](prometheus.md#lmcache-metrics)** in the vLLM Prometheus Metrics Guide. For detailed information on LMCache metrics, including the complete list of available metrics and how to access them, see the **[LMCache Metrics section](prometheus.md#lmcache-metrics)** in the vLLM Prometheus Metrics Guide.
......
...@@ -136,7 +136,7 @@ curl -s localhost:8081/metrics | grep "^lmcache:" ...@@ -136,7 +136,7 @@ curl -s localhost:8081/metrics | grep "^lmcache:"
## Implementation Details ## Implementation Details
- vLLM v1 uses multiprocess metrics collection via `prometheus_client.multiprocess` - vLLM v1 uses multiprocess metrics collection via `prometheus_client.multiprocess`
- `PROMETHEUS_MULTIPROC_DIR`: vLLM sets this environment variable to a temporary directory where multiprocess metrics are stored as memory-mapped files. Each worker process writes its metrics to separate files in this directory, which are aggregated when `/metrics` is scraped. - `PROMETHEUS_MULTIPROC_DIR`: (optional). By default, Dynamo automatically manages this environment variable, setting it to a temporary directory where multiprocess metrics are stored as memory-mapped files. Each worker process writes its metrics to separate files in this directory, which are aggregated when `/metrics` is scraped. Users only need to set this explicitly where complete control over the metrics directory is required.
- Dynamo uses `MultiProcessCollector` to aggregate metrics from all worker processes - Dynamo uses `MultiProcessCollector` to aggregate metrics from all worker processes
- Metrics are filtered by the `vllm:` and `lmcache:` prefixes before being exposed (when LMCache is enabled) - Metrics are filtered by the `vllm:` and `lmcache:` prefixes before being exposed (when LMCache is enabled)
- The integration uses Dynamo's `register_engine_metrics_callback()` function with the global `REGISTRY` - The integration uses Dynamo's `register_engine_metrics_callback()` function with the global `REGISTRY`
......
...@@ -4,10 +4,13 @@ ...@@ -4,10 +4,13 @@
set -e set -e
trap 'echo Cleaning up...; kill 0' EXIT trap 'echo Cleaning up...; kill 0' EXIT
# Explicitly unset PROMETHEUS_MULTIPROC_DIR to let LMCache or Dynamo manage it internally
unset PROMETHEUS_MULTIPROC_DIR
# run ingress # run ingress
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000) # dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python -m dynamo.frontend & python -m dynamo.frontend &
# run worker with LMCache enabled # run worker with LMCache enabled (without PROMETHEUS_MULTIPROC_DIR set externally)
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \ DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --connector lmcache python -m dynamo.vllm --model Qwen/Qwen3-0.6B --connector lmcache
#!/bin/bash
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
set -e
# Explicitly set PROMETHEUS_MULTIPROC_DIR (K8s-style deployment)
# Use unique directory per test run to avoid conflicts
export PROMETHEUS_MULTIPROC_DIR=${PROMETHEUS_MULTIPROC_DIR:-/tmp/prometheus_multiproc_$$_$RANDOM}
rm -rf "$PROMETHEUS_MULTIPROC_DIR"
mkdir -p "$PROMETHEUS_MULTIPROC_DIR"
# Cleanup function to remove the directory on exit
cleanup() {
echo "Cleaning up..."
rm -rf "$PROMETHEUS_MULTIPROC_DIR"
kill 0
}
trap cleanup EXIT
# run ingress
# dynamo.frontend accepts either --http-port flag or DYN_HTTP_PORT env var (defaults to 8000)
python -m dynamo.frontend &
# run worker with LMCache enabled and PROMETHEUS_MULTIPROC_DIR explicitly set
DYN_SYSTEM_PORT=${DYN_SYSTEM_PORT:-8081} \
PROMETHEUS_MULTIPROC_DIR="$PROMETHEUS_MULTIPROC_DIR" \
python -m dynamo.vllm --model Qwen/Qwen3-0.6B --connector lmcache
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import base64 import base64
import logging import logging
import os import os
import random
from dataclasses import dataclass, field from dataclasses import dataclass, field
import pytest import pytest
...@@ -64,6 +65,22 @@ vllm_configs = { ...@@ -64,6 +65,22 @@ vllm_configs = {
metric_payload_default(min_num_requests=6, backend="lmcache"), metric_payload_default(min_num_requests=6, backend="lmcache"),
], ],
), ),
"aggregated_lmcache_multiproc": VLLMConfig(
name="aggregated_lmcache_multiproc",
directory=vllm_dir,
script_name="agg_lmcache_multiproc.sh",
marks=[pytest.mark.gpu_1],
model="Qwen/Qwen3-0.6B",
env={
"PROMETHEUS_MULTIPROC_DIR": f"/tmp/prometheus_multiproc_test_{os.getpid()}_{random.randint(0, 10000)}"
},
request_payloads=[
chat_payload_default(),
completion_payload_default(),
metric_payload_default(min_num_requests=6, backend="vllm"),
metric_payload_default(min_num_requests=6, backend="lmcache"),
],
),
"agg-request-plane-tcp": VLLMConfig( "agg-request-plane-tcp": VLLMConfig(
name="agg-request-plane-tcp", name="agg-request-plane-tcp",
directory=vllm_dir, directory=vllm_dir,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment