Unverified Commit 733c3dce authored by jh-nv's avatar jh-nv Committed by GitHub
Browse files

feat: global router configuration migrate (#6342)

parent 04682cf8
...@@ -62,14 +62,16 @@ python -m dynamo.global_router \ ...@@ -62,14 +62,16 @@ python -m dynamo.global_router \
### Arguments ### Arguments
| Argument | Required | Default | Description | All options can be set via CLI flags or environment variables. CLI flags take precedence over environment variables.
|----------|----------|---------|-------------|
| `--config` | Yes | - | Path to JSON configuration file | | Argument | Required (CLI or env) | Env var | Default | Description |
| `--model-name` | Yes | - | Model name for registration (must match workers) | |----------|----------------------|---------|---------|-------------|
| `--namespace` | No | `DYN_NAMESPACE` env var or "dynamo" | Namespace for global router | | `--config` | Yes | `DYN_GLOBAL_ROUTER_CONFIG` | - | Path to JSON configuration file |
| `--component-name` | No | "global_router" | Component name | | `--model-name` | Yes | `DYN_GLOBAL_ROUTER_MODEL_NAME` | - | Model name for registration (must match workers) |
| `--default-ttft-target` | No | None | Default TTFT target (ms) for prefill pool selection | | `--namespace` | No | `DYN_NAMESPACE` | "dynamo" | Namespace for global router |
| `--default-itl-target` | No | None | Default ITL target (ms) for decode pool selection | | `--component-name` | No | `DYN_GLOBAL_ROUTER_COMPONENT_NAME` | "global_router" | Component name |
| `--default-ttft-target` | No | `DYN_GLOBAL_ROUTER_DEFAULT_TTFT_TARGET` | None | Default TTFT target (ms) for prefill pool selection |
| `--default-itl-target` | No | `DYN_GLOBAL_ROUTER_DEFAULT_ITL_TARGET` | None | Default ITL target (ms) for decode pool selection |
## Configuration ## Configuration
......
...@@ -20,7 +20,6 @@ Key features: ...@@ -20,7 +20,6 @@ Key features:
import argparse import argparse
import asyncio import asyncio
import logging import logging
import os
import uvloop import uvloop
...@@ -28,91 +27,50 @@ from dynamo.llm import ModelInput, ModelType, register_model ...@@ -28,91 +27,50 @@ from dynamo.llm import ModelInput, ModelType, register_model
from dynamo.runtime import DistributedRuntime, dynamo_worker from dynamo.runtime import DistributedRuntime, dynamo_worker
from dynamo.runtime.logging import configure_dynamo_logging from dynamo.runtime.logging import configure_dynamo_logging
from .backend_args import DynamoGlobalRouterArgGroup, DynamoGlobalRouterConfig
from .handler import GlobalRouterHandler from .handler import GlobalRouterHandler
configure_dynamo_logging() configure_dynamo_logging()
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
DYN_NAMESPACE = os.environ.get("DYN_NAMESPACE", "dynamo")
def parse_args() -> DynamoGlobalRouterConfig:
def parse_args():
"""Parse command-line arguments for the Global Router service.""" """Parse command-line arguments for the Global Router service."""
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Dynamo Global Router Service: Hierarchical routing to prefill/decode pools", description="Dynamo Global Router Service: Hierarchical routing to prefill/decode pools",
formatter_class=argparse.RawTextHelpFormatter, formatter_class=argparse.RawTextHelpFormatter,
) )
DynamoGlobalRouterArgGroup().add_arguments(parser)
parser.add_argument( args = parser.parse_args()
"--config", config = DynamoGlobalRouterConfig.from_cli_args(args)
type=str, config.validate()
required=True, return config
help="Path to the JSON configuration file defining pool namespaces and selection strategy",
)
parser.add_argument(
"--model-name",
type=str,
required=True,
help="Model name for registration (must match workers)",
)
parser.add_argument(
"--namespace",
type=str,
default=DYN_NAMESPACE,
help=f"Dynamo namespace for the global router (default: {DYN_NAMESPACE})",
)
parser.add_argument(
"--component-name",
type=str,
default="global_router",
help="Component name for the global router (default: global_router)",
)
parser.add_argument(
"--default-ttft-target",
type=float,
default=None,
help="Default TTFT target (ms) for prefill pool selection when SLA not present in request",
)
parser.add_argument(
"--default-itl-target",
type=float,
default=None,
help="Default ITL target (ms) for decode pool selection when SLA not present in request",
)
return parser.parse_args()
@dynamo_worker() @dynamo_worker()
async def worker(runtime: DistributedRuntime): async def worker(runtime: DistributedRuntime):
"""Main worker function for the Global Router service.""" """Main worker function for the Global Router service."""
args = parse_args() config = parse_args()
logger.info("Starting Global Router Service") logger.info("Starting Global Router Service")
logger.info(f"Config: {args.config}") logger.info(f"Config: {config.config_path}")
logger.info(f"Model name: {args.model_name}") logger.info(f"Model name: {config.model_name}")
logger.info(f"Namespace: {args.namespace}") logger.info(f"Namespace: {config.namespace}")
# Create handler # Create handler
handler = GlobalRouterHandler( handler = GlobalRouterHandler(
runtime=runtime, runtime=runtime,
config_path=args.config, config_path=config.config_path,
model_name=args.model_name, model_name=config.model_name,
default_ttft_target=args.default_ttft_target, default_ttft_target=config.default_ttft_target,
default_itl_target=args.default_itl_target, default_itl_target=config.default_itl_target,
) )
# Initialize connections to local routers # Initialize connections to local routers
await handler.initialize() await handler.initialize()
# Create component in the global router namespace # Create component in the global router namespace
component = runtime.namespace(args.namespace).component(args.component_name) component = runtime.namespace(config.namespace).component(config.component_name)
# Create endpoints for prefill and decode # Create endpoints for prefill and decode
# Note: We use separate endpoints so we can register them with different ModelTypes # Note: We use separate endpoints so we can register them with different ModelTypes
...@@ -126,11 +84,11 @@ async def worker(runtime: DistributedRuntime): ...@@ -126,11 +84,11 @@ async def worker(runtime: DistributedRuntime):
model_input=ModelInput.Tokens, model_input=ModelInput.Tokens,
model_type=ModelType.Prefill, model_type=ModelType.Prefill,
endpoint=prefill_endpoint, endpoint=prefill_endpoint,
model_path=args.model_name, model_path=config.model_name,
model_name=args.model_name, model_name=config.model_name,
) )
logger.info( logger.info(
f"Registered prefill endpoint: {args.namespace}.{args.component_name}.prefill_generate" f"Registered prefill endpoint: {config.namespace}.{config.component_name}.prefill_generate"
) )
logger.info("Registering as decode worker...") logger.info("Registering as decode worker...")
...@@ -139,11 +97,11 @@ async def worker(runtime: DistributedRuntime): ...@@ -139,11 +97,11 @@ async def worker(runtime: DistributedRuntime):
model_input=ModelInput.Tokens, model_input=ModelInput.Tokens,
model_type=ModelType.Chat | ModelType.Completions, model_type=ModelType.Chat | ModelType.Completions,
endpoint=decode_endpoint, endpoint=decode_endpoint,
model_path=args.model_name, model_path=config.model_name,
model_name=args.model_name, model_name=config.model_name,
) )
logger.info( logger.info(
f"Registered decode endpoint: {args.namespace}.{args.component_name}.decode_generate" f"Registered decode endpoint: {config.namespace}.{config.component_name}.decode_generate"
) )
logger.info("Global Router ready - serving endpoints...") logger.info("Global Router ready - serving endpoints...")
......
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Dynamo Global Router configuration ArgGroup."""
from typing import Optional
from dynamo.common.configuration.arg_group import ArgGroup
from dynamo.common.configuration.config_base import ConfigBase
from dynamo.common.configuration.utils import add_argument
class DynamoGlobalRouterArgGroup(ArgGroup):
"""Global Router-specific Dynamo configuration (CLI and env)."""
def add_arguments(self, parser) -> None:
"""Add Dynamo Global Router arguments to parser."""
g = parser.add_argument_group("Dynamo Global Router Options")
add_argument(
g,
flag_name="--config",
env_var="DYN_GLOBAL_ROUTER_CONFIG",
default=None,
help="Path to the JSON configuration file defining pool namespaces and selection strategy. Must be set via CLI or env.",
dest="config_path",
)
add_argument(
g,
flag_name="--model-name",
env_var="DYN_GLOBAL_ROUTER_MODEL_NAME",
default=None,
help="Model name for registration (must match workers). Must be set via CLI or env.",
)
add_argument(
g,
flag_name="--namespace",
env_var="DYN_NAMESPACE",
default="dynamo",
help="Dynamo namespace for the global router.",
)
add_argument(
g,
flag_name="--component-name",
env_var="DYN_GLOBAL_ROUTER_COMPONENT_NAME",
default="global_router",
help="Component name for the global router.",
)
add_argument(
g,
flag_name="--default-ttft-target",
env_var="DYN_GLOBAL_ROUTER_DEFAULT_TTFT_TARGET",
default=None,
help="Default TTFT target (ms) for prefill pool selection when SLA not present in request.",
arg_type=float,
)
add_argument(
g,
flag_name="--default-itl-target",
env_var="DYN_GLOBAL_ROUTER_DEFAULT_ITL_TARGET",
default=None,
help="Default ITL target (ms) for decode pool selection when SLA not present in request.",
arg_type=float,
)
class DynamoGlobalRouterConfig(ConfigBase):
"""Configuration for Dynamo Global Router (CLI/env-backed)."""
config_path: Optional[str] = None
model_name: Optional[str] = None
namespace: str
component_name: str
default_ttft_target: Optional[float] = None
default_itl_target: Optional[float] = None
def validate(self) -> None:
"""Require config_path and model_name to be set via CLI or env."""
if not self.config_path or not self.config_path.strip():
raise ValueError(
"config_path must be set via --config or DYN_GLOBAL_ROUTER_CONFIG"
)
if not self.model_name or not self.model_name.strip():
raise ValueError(
"model_name must be set via --model-name or DYN_GLOBAL_ROUTER_MODEL_NAME"
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment