Unverified Commit 69797b5a authored by Graham King's avatar Graham King Committed by GitHub
Browse files

feat: Only monitor NATS metrics if using NATS request plane (#4442)


Signed-off-by: default avatarGraham King <grahamk@nvidia.com>
parent a8e5328e
......@@ -40,7 +40,6 @@ async def init_planner(runtime: DistributedRuntime, args):
await start_sla_planner(runtime, args)
component = runtime.namespace(args.namespace).component("Planner")
await component.create_service()
async def generate(request: RequestType):
"""Dummy endpoint to satisfy that each component has an endpoint"""
......
......@@ -260,7 +260,6 @@ async def worker(runtime: DistributedRuntime):
# Create service component - use "router" as component name
component = runtime.namespace(namespace).component("router")
await component.create_service()
# Create handler
handler = StandaloneRouterHandler(
......
......@@ -110,7 +110,6 @@ async def init(runtime: DistributedRuntime, config: Config):
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -197,7 +196,6 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -257,7 +255,6 @@ async def init_embedding(runtime: DistributedRuntime, config: Config):
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -315,7 +312,6 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -364,7 +360,6 @@ async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Con
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -405,7 +400,6 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......@@ -447,7 +441,6 @@ async def init_multimodal_prefill_worker(runtime: DistributedRuntime, config: Co
component = runtime.namespace(dynamo_args.namespace).component(
dynamo_args.component
)
await component.create_service()
generate_endpoint = component.endpoint(dynamo_args.endpoint)
......
......@@ -143,7 +143,6 @@ async def init(runtime: DistributedRuntime, config: Config):
)
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
# Convert model path to Path object if it's a local path, otherwise keep as string
model_path = str(config.model_path)
......
......@@ -326,7 +326,6 @@ async def init_prefill(runtime: DistributedRuntime, config: Config):
Instantiate and serve
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
clear_endpoint = component.endpoint("clear_kv_blocks")
......@@ -427,7 +426,6 @@ async def init(runtime: DistributedRuntime, config: Config):
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
clear_endpoint = component.endpoint("clear_kv_blocks")
......@@ -558,7 +556,6 @@ def get_engine_cache_info(engine: AsyncLLM):
async def init_multimodal_processor(runtime: DistributedRuntime, config: Config):
"""Initialize multimodal processor component"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......@@ -610,7 +607,6 @@ async def init_multimodal_processor(runtime: DistributedRuntime, config: Config)
async def init_multimodal_encode_worker(runtime: DistributedRuntime, config: Config):
"""Initialize multimodal encode worker component"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......@@ -657,7 +653,6 @@ async def init_multimodal_worker(runtime: DistributedRuntime, config: Config):
Both can operate in aggregated (P+D) or disaggregated (P→D) mode.
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
clear_endpoint = component.endpoint("clear_kv_blocks")
......
......@@ -53,7 +53,7 @@ The hierarchy and naming in etcd and NATS may change over time, and this documen
For etcd, it also creates a primary lease and spin up a background task to keep the lease alive. All objects registered under this `DistributedRuntime` use this lease_id to maintain their life cycle. There is also a cancellation token that is tied to the primary lease. When the cancellation token is triggered or the background task failed, the primary lease is revoked or expired and the kv pairs stored with this lease_id is removed.
- `Namespace`: `Namespace`s are primarily a logical grouping mechanism and is not registered in etcd. It provides the root path for all components under this `Namespace`.
- `Component`: When a `Component` object is created, similar to `Namespace`, it isn't be registered in etcd. When `create_service` is called, it creates a NATS service group using `{namespace_name}.{service_name}` and registers a service in the registry of the `Component`, where the registry is an internal data structure that tracks all services and endpoints within the `DistributedRuntime`.
- `Component`: When a `Component` object is created, similar to `Namespace`, it isn't be registered in etcd. When `create_service` is called, it creates a NATS service group using `{namespace_name}.{service_name}` for metrics and registers a service in the registry of the `Component`, where the registry is an internal data structure that tracks all services and endpoints within the `DistributedRuntime`.
- `Endpoint`: When an Endpoint object is created and started, it performs two key registrations:
- NATS Registration: The endpoint is registered with the NATS service group created during service creation. The endpoint is assigned a unique subject following the naming: `{namespace_name}.{service_name}.{endpoint_name}-{lease_id_hex}`.
- etcd Registration: The endpoint information is stored in etcd at a path following the naming: `/services/{namespace}/{component}/{endpoint}-{lease_id}`. Note that the endpoints of different workers of the same type (i.e., two `VllmPrefillWorker`s in one deployment) share the same `Namespace`, `Component`, and `Endpoint` name. They are distinguished by their different primary `lease_id` of their `DistributedRuntime`.
......
......@@ -27,7 +27,6 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
# 2. Register ourselves on the network
#
component = runtime.namespace("namespace").component("component")
await component.create_service()
model_path = "Qwen/Qwen3-0.6B" # or "/data/models/Qwen3-0.6B"
model_input = ModelInput.Tokens # or ModelInput.Text if engine handles pre-processing
model_type = ModelType.Chat # or ModelType.Chat | ModelType.Completions if model can be deployed on chat and completions endpoints
......
......@@ -339,7 +339,6 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker
# 2. Register ourselves on the network
#
component = runtime.namespace("namespace").component("component")
await component.create_service()
model_path = "Qwen/Qwen3-0.6B" # or "/data/models/Qwen3-0.6B"
model_input = ModelInput.Tokens # or ModelInput.Text if engine handles pre-processing
model_type = ModelType.Chat # or ModelType.Chat | ModelType.Completions if model can be deployed on chat and completions endpoints
......
......@@ -58,7 +58,6 @@ async def main():
# Create middle server component
component = runtime.namespace("demo").component("middle")
await component.create_service()
endpoint = component.endpoint("generate")
......
......@@ -35,7 +35,6 @@ async def main():
# Create server component
component = runtime.namespace("demo").component("server")
await component.create_service()
endpoint = component.endpoint("generate")
handler = DemoServer()
......
......@@ -28,7 +28,6 @@ async def worker(runtime: DistributedRuntime):
endpoint_name = "generate"
component = runtime.namespace(namespace_name).component(component_name)
await component.create_service()
logger.info(f"Created service {namespace_name}/{component_name}")
......
......@@ -93,7 +93,6 @@ async def worker(runtime: DistributedRuntime):
namespace, comp_name, endpoint_name = parts
component = runtime.namespace(namespace).component(comp_name)
await component.create_service()
stats_endpoint = component.endpoint(endpoint_name)
print(
......
......@@ -269,7 +269,6 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......
......@@ -226,7 +226,6 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......
......@@ -303,7 +303,6 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......
......@@ -273,7 +273,6 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
......
......@@ -437,7 +437,6 @@ async def init(runtime: DistributedRuntime, args: argparse.Namespace, config: Co
"""
component = runtime.namespace(config.namespace).component(config.component)
await component.create_service()
generate_endpoint = component.endpoint(config.endpoint)
clear_endpoint = component.endpoint("clear_kv_blocks")
......
......@@ -81,7 +81,12 @@ pub async fn run(
let request_plane: RequestPlaneMode = flags.request_plane.parse()?;
let dst_config = DistributedConfig {
store_backend: selected_store,
nats_config: nats::ClientOptions::default(),
// We only need NATS here to monitor it's metrics, so only if it's our request plane.
nats_config: if request_plane.is_nats() {
Some(nats::ClientOptions::default())
} else {
None
},
request_plane,
};
let distributed_runtime = DistributedRuntime::new(runtime.clone(), dst_config).await?;
......
......@@ -32,7 +32,6 @@ class RequestHandler:
@dynamo_worker()
async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/bls").component("bar")
await component.create_service()
endpoint = component.endpoint("generate")
await endpoint.serve_endpoint(RequestHandler().generate)
......
......@@ -31,7 +31,6 @@ class RequestHandler:
@dynamo_worker()
async def worker(runtime: DistributedRuntime):
component = runtime.namespace("examples/bls").component("foo")
await component.create_service()
endpoint = component.endpoint("generate")
await endpoint.serve_endpoint(RequestHandler().generate)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment