Unverified Commit 4bce7f75 authored by Keiven C's avatar Keiven C Committed by GitHub
Browse files

feat: add prefill worker metrics support for vLLM (#3949)


Signed-off-by: default avatarKeiven Chang <keivenchang@users.noreply.github.com>
parent 18ade7d8
...@@ -12,6 +12,7 @@ from vllm.inputs import TokensPrompt ...@@ -12,6 +12,7 @@ from vllm.inputs import TokensPrompt
from vllm.sampling_params import SamplingParams from vllm.sampling_params import SamplingParams
from vllm.v1.engine.exceptions import EngineDeadError from vllm.v1.engine.exceptions import EngineDeadError
from dynamo.llm import ZmqKvEventPublisher
from dynamo.runtime.logging import configure_dynamo_logging from dynamo.runtime.logging import configure_dynamo_logging
from .engine_monitor import VllmEngineMonitor from .engine_monitor import VllmEngineMonitor
...@@ -62,7 +63,7 @@ class BaseWorkerHandler(ABC): ...@@ -62,7 +63,7 @@ class BaseWorkerHandler(ABC):
self.component = component self.component = component
self.engine_client = engine self.engine_client = engine
self.default_sampling_params = default_sampling_params self.default_sampling_params = default_sampling_params
self.kv_publishers = None self.kv_publishers: list[ZmqKvEventPublisher] | None = None
self.engine_monitor = VllmEngineMonitor(runtime, engine) self.engine_monitor = VllmEngineMonitor(runtime, engine)
@abstractmethod @abstractmethod
......
...@@ -274,6 +274,11 @@ async def init_prefill(runtime: DistributedRuntime, config: Config): ...@@ -274,6 +274,11 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
if kv_publishers: if kv_publishers:
handler.kv_publishers = kv_publishers handler.kv_publishers = kv_publishers
if config.engine_args.disable_log_stats is False:
register_engine_metrics_callback(
endpoint=generate_endpoint, registry=REGISTRY, metric_prefix_filter="vllm:"
)
# Register prefill model with ModelType.Prefill # Register prefill model with ModelType.Prefill
if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register if not config.engine_args.data_parallel_rank: # if rank is 0 or None then register
await register_vllm_model( await register_vllm_model(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment