help="Full endpoint path for workers in the format namespace.component.endpoint (e.g., dynamo.prefill.generate for prefill workers)",
arg_type=str,
)
add_argument(
g,
flag_name="--router-block-size",
env_var="DYN_ROUTER_BLOCK_SIZE",
default=128,
help="KV cache block size for routing decisions",
arg_type=int,
obsolete_flag="--block-size",
)
add_argument(
g,
flag_name="--router-kv-overlap-score-weight",
env_var="DYN_ROUTER_KV_OVERLAP_SCORE_WEIGHT",
default=1.0,
help="KV Router: Weight for overlap score in worker selection. Higher values prioritize KV cache reuse",
arg_type=float,
obsolete_flag="--kv-overlap-score-weight",
)
add_argument(
g,
flag_name="--router-temperature",
env_var="DYN_ROUTER_TEMPERATURE",
default=0.0,
help="KV Router: Temperature for worker sampling via softmax. Higher values promote more randomness, and 0 fallbacks to deterministic.",
arg_type=float,
)
add_negatable_bool_argument(
g,
flag_name="--router-kv-events",
env_var="DYN_ROUTER_USE_KV_EVENTS",
default=True,
help="KV Router: Enable KV events from workers. When disabled (--no-router-kv-events), the router predicts cache state based on routing decisions with TTL-based expiration and pruning, rather than receiving events from workers.",
dest="router_use_kv_events",
obsolete_flag="--kv-events",
)
add_negatable_bool_argument(
g,
flag_name="--router-replica-sync",
env_var="DYN_ROUTER_REPLICA_SYNC",
default=False,
help="KV Router: Enable replica synchronization across multiple router instances. When true, routers will publish and subscribe to events to maintain consistent state.",
)
add_argument(
g,
flag_name="--router-snapshot-threshold",
env_var="DYN_ROUTER_SNAPSHOT_THRESHOLD",
default=1000000,
help="KV Router: Number of messages in stream before triggering a snapshot",
arg_type=int,
)
add_negatable_bool_argument(
g,
flag_name="--router-reset-states",
env_var="DYN_ROUTER_RESET_STATES",
default=False,
help="KV Router: Reset router state on startup, purging stream and object store. WARNING: Can affect existing router replicas.",
)
add_negatable_bool_argument(
g,
flag_name="--router-durable-kv-events",
env_var="DYN_ROUTER_DURABLE_KV_EVENTS",
default=False,
help="[Deprecated] KV Router: Enable durable KV events using NATS JetStream. This option will be removed in a future release. The event-plane subscriber (local_indexer mode) is now the recommended path.",
obsolete_flag="--durable-kv-events",
)
add_negatable_bool_argument(
g,
flag_name="--router-track-active-blocks",
env_var="DYN_ROUTER_TRACK_ACTIVE_BLOCKS",
default=True,
help="KV Router: Track active blocks for load balancing. Use --no-router-track-active-blocks to disable",
obsolete_flag="--track-active-blocks",
)
add_negatable_bool_argument(
g,
flag_name="--router-assume-kv-reuse",
env_var="DYN_ROUTER_ASSUME_KV_REUSE",
default=True,
help="KV Router: When tracking active blocks, assume KV cache reuse. Use --no-router-assume-kv-reuse to use random hashes, useful when KV cache reuse is not expected.",
obsolete_flag="--assume-kv-reuse",
)
add_negatable_bool_argument(
g,
flag_name="--router-track-output-blocks",
env_var="DYN_ROUTER_TRACK_OUTPUT_BLOCKS",
default=False,
help="KV Router: Track output blocks during generation. When enabled, the router adds placeholder blocks as tokens are generated and applies fractional decay based on progress toward expected output sequence length (agent_hints.osl in nvext).",
obsolete_flag="--track-output-blocks",
)
add_argument(
g,
flag_name="--router-ttl-secs",
env_var="DYN_ROUTER_TTL_SECS",
default=120.0,
help="KV Router: TTL for blocks in seconds. Only used when --no-router-kv-events is set. Controls how long cached blocks are considered valid without explicit events.",
arg_type=float,
)
add_argument(
g,
flag_name="--router-max-tree-size",
env_var="DYN_ROUTER_MAX_TREE_SIZE",
default=2**20,
help="KV Router: Maximum tree size before pruning. Only used when --no-router-kv-events is set. When the indexer tree exceeds this size, pruning is triggered.",
arg_type=int,
)
add_argument(
g,
flag_name="--router-prune-target-ratio",
env_var="DYN_ROUTER_PRUNE_TARGET_RATIO",
default=0.8,
help="KV Router: Target size ratio after pruning (0.0-1.0). Only used when --no-router-kv-events is set. Determines how aggressively to prune the tree.",
arg_type=float,
)
add_argument(
g,
flag_name="--router-event-threads",
env_var="DYN_ROUTER_EVENT_THREADS",
default=4,
help="KV Router: Number of event processing threads. >1 uses concurrent radix tree and thread pool for higher throughput. Ignored when --no-router-kv-events is set (approximate mode always uses single-threaded indexer with TTL/pruning).",