@@ -15,6 +15,7 @@ class DynamoRuntimeConfig(ConfigBase):
"""Configuration for Dynamo runtime (common across all backends)."""
namespace:str
endpoint:Optional[str]=None
discovery_backend:str
request_plane:str
event_plane:str
...
...
@@ -52,6 +53,13 @@ class DynamoRuntimeArgGroup(ArgGroup):
default="dynamo",
help="Dynamo namespace",
)
add_argument(
g,
flag_name="--endpoint",
env_var="DYN_ENDPOINT",
default=None,
help="Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: dyn://dynamo.backend.generate. Currently used only by TRT-LLM and SGLang backends.",
"help":f"Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Example: {DEFAULT_ENDPOINT}",
},
"tool-call-parser":{
"flags":["--dyn-tool-call-parser"],
"type":str,
"default":None,
"choices":get_tool_parser_names(),
"help":"Tool call parser name for the model.",
},
"reasoning-parser":{
"flags":["--dyn-reasoning-parser"],
"type":str,
"default":None,
"choices":get_reasoning_parser_names(),
"help":"Reasoning parser name for the model. If not specified, no reasoning parsing is performed.",
},
"custom-jinja-template":{
"flags":["--custom-jinja-template"],
"type":str,
"default":None,
"help":"Path to a custom Jinja template file to override the model's default chat template. This template will take precedence over any template found in the model repository. This template will be applied by Dynamo's preprocessor and cannot be used with --use-sglang-tokenizer.",
},
"endpoint-types":{
"flags":["--dyn-endpoint-types"],
"type":str,
"default":"chat,completions",
"help":"Comma-separated list of endpoint types to enable. Options: 'chat', 'completions'. Default: 'chat,completions'. Use 'completions' for models without chat templates.",
},
"use-sglang-tokenizer":{
"flags":["--use-sglang-tokenizer"],
"action":"store_true",
"default":False,
"help":"Use SGLang's tokenizer for pre and post processing. This bypasses Dynamo's preprocessor and only v1/chat/completions will be available through the Dynamo frontend. Cannot be used with --custom-jinja-template.",
},
"multimodal-processor":{
"flags":["--multimodal-processor"],
"action":"store_true",
"default":False,
"help":"Run as multimodal processor component for handling multimodal requests",
},
"multimodal-encode-worker":{
"flags":["--multimodal-encode-worker"],
"action":"store_true",
"default":False,
"help":"Run as multimodal encode worker component for processing images/videos",
},
"multimodal-worker":{
"flags":["--multimodal-worker"],
"action":"store_true",
"default":False,
"help":"Run as multimodal worker component for LLM inference with multimodal data",
},
"embedding-worker":{
"flags":["--embedding-worker"],
"action":"store_true",
"default":False,
"help":"Run as embedding worker component (Dynamo flag, also sets SGLang's --is-embedding)",
},
"dump-config-to":{
"flags":["--dump-config-to"],
"type":str,
"default":None,
"help":"Dump debug config to the specified file path. If not specified, the config will be dumped to stdout at INFO level.",
"help":"Discovery backend: kubernetes (K8s API), etcd (distributed KV), file (local filesystem), mem (in-memory). Etcd uses the ETCD_* env vars (e.g. ETCD_ENDPOINTS) for connection details. File uses root dir from env var DYN_FILE_KV or defaults to $TMPDIR/dynamo_store_kv.",
"help":"Enable durable KV events using NATS JetStream instead of the local indexer. By default, local indexer is enabled for lower latency. Use this flag when you need durability and multi-replica router consistency. Requires NATS with JetStream enabled. Can also be set via DYN_DURABLE_KV_EVENTS=true env var.",
},
"image-diffusion-worker":{
"flags":["--image-diffusion-worker"],
"action":"store_true",
"default":False,
"help":"Run as image diffusion worker for image generation",
},
"image-diffusion-fs-url":{
"flags":["--image-diffusion-fs-url"],
"type":str,
"default":None,
"help":"Filesystem URL for storing generated images using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
},
"video-generation-worker":{
"flags":["--video-generation-worker"],
"action":"store_true",
"default":False,
"help":"Run as video generation worker for video generation (T2V/I2V)",
},
"video-generation-fs-url":{
"flags":["--video-generation-fs-url"],
"type":str,
"default":None,
"help":"Filesystem URL for storing generated videos using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
help="Use SGLang's tokenizer for pre and post processing. This bypasses Dynamo's preprocessor and only v1/chat/completions will be available through the Dynamo frontend. Cannot be used with --custom-jinja-template.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-processor",
env_var="DYN_SGL_MULTIMODAL_PROCESSOR",
default=False,
help="Run as multimodal processor component for handling multimodal requests.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-encode-worker",
env_var="DYN_SGL_MULTIMODAL_ENCODE_WORKER",
default=False,
help="Run as multimodal encode worker component for processing images/videos.",
)
add_negatable_bool_argument(
g,
flag_name="--multimodal-worker",
env_var="DYN_SGL_MULTIMODAL_WORKER",
default=False,
help="Run as multimodal worker component for LLM inference with multimodal data.",
)
add_negatable_bool_argument(
g,
flag_name="--embedding-worker",
env_var="DYN_SGL_EMBEDDING_WORKER",
default=False,
help="Run as embedding worker component (Dynamo flag, also sets SGLang's --is-embedding).",
)
add_negatable_bool_argument(
g,
flag_name="--image-diffusion-worker",
env_var="DYN_SGL_IMAGE_DIFFUSION_WORKER",
default=False,
help="Run as image diffusion worker for image generation.",
)
add_argument(
g,
flag_name="--image-diffusion-fs-url",
env_var="DYN_SGL_IMAGE_DIFFUSION_FS_URL",
default=None,
help="Filesystem URL for storing generated images using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
)
add_argument(
g,
flag_name="--image-diffusion-base-url",
env_var="DYN_SGL_IMAGE_DIFFUSION_BASE_URL",
default="http://localhost:8008/",
help="Base URL for rewriting image URLs in responses (e.g., http://localhost:8008/). When set, generated image URLs will use this base instead of filesystem URLs.",
)
add_argument(
g,
flag_name="--disagg-config",
env_var="DYN_SGL_DISAGG_CONFIG",
default=None,
help="Disaggregation configuration file in YAML format.",
)
add_argument(
g,
flag_name="--disagg-config-key",
env_var="DYN_SGL_DISAGG_CONFIG_KEY",
default=None,
help="Key to select from nested disaggregation configuration file (e.g., 'prefill', 'decode').",
)
add_negatable_bool_argument(
g,
flag_name="--video-generation-worker",
env_var="DYN_SGL_VIDEO_GENERATION_WORKER",
default=False,
help="Run as video generation worker for video generation (T2V/I2V).",
)
add_argument(
g,
flag_name="--video-generation-fs-url",
env_var="DYN_SGL_VIDEO_GENERATION_FS_URL",
default=None,
help="Filesystem URL for storing generated videos using fsspec (e.g., s3://bucket/path, gs://bucket/path, file:///local/path). Supports any fsspec-compatible filesystem.",
)
classDynamoSGLangConfig(ConfigBase):
"""Configuration for Dynamo SGLang wrapper (SGLang-specific only)."""