fix: migrate example and document to the latest endpoint API (#6542)

42cde264 · jh-nv · GitHub · 222c2e85 · 42cde264 · 42cde264
Unverified Commit 42cde264 authored Feb 25, 2026 by jh-nv Committed by GitHub Feb 25, 2026
5 changed files
--- a/docs/pages/components/router/router-examples.md
+++ b/docs/pages/components/router/router-examples.md
@@ -58,9 +58,7 @@ from dynamollm import DistributedRuntime, KvRouter, KvRouterConfig
 async def main():
    # Get runtime and create endpoint
    runtime = DistributedRuntime.detached()
-    namespace = runtime.namespace("dynamo")
-    component = namespace.component("backend")
-    endpoint = component.endpoint("generate")
+    endpoint = runtime.endpoint("dynamo.backend.generate")

    # Create KV router
    kv_router_config = KvRouterConfig()
@@ -227,9 +225,7 @@ from dynamo.llm import DistributedRuntime, KvRouter, KvRouterConfig
 async def minimize_ttft_routing():
    # Setup router
    runtime = DistributedRuntime.detached()
-    namespace = runtime.namespace("dynamo")
-    component = namespace.component("backend")
-    endpoint = component.endpoint("generate")
+    endpoint = runtime.endpoint("dynamo.backend.generate")

    router = KvRouter(
        endpoint=endpoint,

--- a/docs/pages/components/router/router-guide.md
+++ b/docs/pages/components/router/router-guide.md
@@ -211,7 +211,7 @@ Dynamo supports several routing strategies when sending requests from one compon
 First, we must create a client tied to a components endpoint, we can do this using the labels defined above. Here we are getting a client tied to the `generate` endpoint of the `VllmWorker` component.

 ```python
-client = namespace('dynamo').component('VllmWorker').endpoint('generate').client()
+client = runtime.endpoint("dynamo.VllmWorker.generate").client()
 ```

 We can then use the default routing methods exposed by the client class to send requests to the `VllmWorker` component.
@@ -292,7 +292,7 @@ When both workers are registered, requests are automatically routed.

 ```python
 # Decode worker registration (in your decode worker)
-decode_endpoint = runtime.namespace("dynamo").component("decode").endpoint("generate")
+decode_endpoint = runtime.endpoint("dynamo.decode.generate")

 await register_model(
    model_input=ModelInput.Tokens,
@@ -305,7 +305,7 @@ await register_model(
 await decode_endpoint.serve_endpoint(decode_handler.generate)

 # Prefill worker registration (in your prefill worker)
-prefill_endpoint = runtime.namespace("dynamo").component("prefill").endpoint("generate")
+prefill_endpoint = runtime.endpoint("dynamo.prefill.generate")

 await register_model(
    model_input=ModelInput.Tokens,

--- a/docs/pages/design-docs/distributed-runtime.md
+++ b/docs/pages/design-docs/distributed-runtime.md
@@ -28,7 +28,7 @@ Since these components are deployed in different processes, each has its own `Di
 - Worker components register with names like `backend`, `prefill`, `decode`, or `encoder` depending on their role
 - Workers register endpoints like `generate`, `clear_kv_blocks`, or `load_metrics`

-Their `DistributedRuntime`s are initialized in their respective main functions, their `Namespace`s are configured in the deployment YAML, their `Component`s are created programmatically (e.g., `runtime.namespace("dynamo").component("backend")`), and their `Endpoint`s are created using the `component.endpoint()` method.
+Their `DistributedRuntime`s are initialized in their respective main functions, their `Namespace`s are configured in the deployment YAML, and their `Endpoint`s are obtained by path. In Python, use `runtime.endpoint("namespace.component.endpoint")` (e.g., `runtime.endpoint("dynamo.backend.generate")`).

 ## Initialization


--- a/docs/pages/development/backend-guide.md
+++ b/docs/pages/development/backend-guide.md
@@ -27,11 +27,10 @@ from dynamo.runtime import DistributedRuntime, dynamo_worker

    # 2. Register ourselves on the network
    #
-    component = runtime.namespace("namespace").component("component")
+    endpoint = runtime.endpoint("namespace.component.endpoint")
    model_path = "Qwen/Qwen3-0.6B" # or "/data/models/Qwen3-0.6B"
    model_input = ModelInput.Tokens # or ModelInput.Text if engine handles pre-processing
    model_type = ModelType.Chat # or ModelType.Chat | ModelType.Completions if model can be deployed on chat and completions endpoints
-    endpoint = component.endpoint("endpoint")
    # Optional last param to register_model is model_name. If not present derives it from model_path
    await register_model(model_input, model_type, endpoint, model_path)


--- a/examples/backends/trtllm/mm_router_worker/mm_router_worker.py
+++ b/examples/backends/trtllm/mm_router_worker/mm_router_worker.py
@@ -133,10 +133,8 @@ async def worker(runtime: DistributedRuntime) -> None:
    )

    # Connect to downstream TRT-LLM workers
-    downstream_endpoint = (
-        runtime.namespace(args.namespace)
-        .component(args.downstream_component)
-        .endpoint(args.downstream_endpoint)
+    downstream_endpoint = runtime.endpoint(
+        f"{args.namespace}.{args.downstream_component}.{args.downstream_endpoint}"
    )
    downstream_client = await downstream_endpoint.client()

@@ -175,8 +173,7 @@ async def worker(runtime: DistributedRuntime) -> None:
    )

    # Register this worker's endpoint
-    component = runtime.namespace(args.namespace).component(args.component)
-    endpoint = component.endpoint(args.endpoint)
+    endpoint = runtime.endpoint(f"{args.namespace}.{args.component}.{args.endpoint}")

    # Use ModelInput.Tokens so Frontend preprocesses the request
    # Request format: {token_ids, sampling_options, stop_conditions, extra_args: {messages}}