help=f"Dynamo endpoint string in 'dyn://namespace.component.endpoint' format. Default: {DEFAULT_ENDPOINT} for decode/aggregated, {DEFAULT_PREFILL_ENDPOINT} for prefill workers, or {DEFAULT_ENCODE_ENDPOINT} for encode workers",
)
parser.add_argument(
"--model-path",
type=str,
default=DEFAULT_MODEL_PATH,
help=f"Path to disk model or HuggingFace model identifier to load. Default: {DEFAULT_MODEL_PATH}",
)
parser.add_argument(
"--served-model-name",
type=str,
default="",
help="Name to serve the model under. Defaults to deriving it from model path.",
help="Maximum number of beams for beam search decoding.",
)
parser.add_argument(
"--free-gpu-memory-fraction",
type=float,
default=None,
help="Free GPU memory fraction reserved for KV Cache, after allocating model weights and buffers.",
)
parser.add_argument(
"--extra-engine-args",
type=str,
default="",
help="Path to a YAML file containing additional keyword arguments to pass to the TRTLLM engine.",
)
parser.add_argument(
"--override-engine-args",
type=str,
default="",
help='Python dictionary string to override specific engine arguments from the YAML file. Example: \'{"tensor_parallel_size": 2, "kv_cache_config": {"enable_block_reuse": false}}\'',
)
parser.add_argument(
"--publish-events-and-metrics",
action="store_true",
help="If set, publish events and metrics to the dynamo components.",
)
parser.add_argument(
"--disaggregation-mode",
type=str,
default=DEFAULT_DISAGGREGATION_MODE,
choices=[mode.valueformodeinDisaggregationMode],
help=f"Mode to use for disaggregation. Default: {DEFAULT_DISAGGREGATION_MODE}",
)
parser.add_argument(
"--use-nixl-connect",
type=bool,
default=False,
help="Use NIXL Connect for communication between workers.",
)
parser.add_argument(
"--modality",
type=str,
default="text",
choices=[m.valueforminModality],
help="Modality to use for the model. Default: text. "
"Options: text (LLM), multimodal (VLM), video_diffusion.",
)
parser.add_argument(
"--encode-endpoint",
type=str,
default="",
help=f"Endpoint(in 'dyn://namespace.component.endpoint' format) for the encode worker. e.g. {DEFAULT_ENCODE_ENDPOINT}",
)
parser.add_argument(
"--allowed-local-media-path",
type=str,
default="",
help="Path to a directory that is allowed to be accessed by the model. Default: empty",
)
parser.add_argument(
"--max-file-size-mb",
type=int,
default=50,
help="Maximum size of downloadable embedding files/Image URLs. Default: 50MB",
)
parser.add_argument(
"--dyn-encoder-cache-capacity-gb",
type=float,
default=0,
help="Capacity of the encoder cache in GB for multimodal embeddings. Default: 0",
)
# To avoid name conflicts with different backends, adoped prefix "dyn-" for dynamo specific args
parser.add_argument(
"--dyn-tool-call-parser",
type=str,
default=None,
choices=get_tool_parser_names(),
help="Tool call parser name for the model.",
)
parser.add_argument(
"--dyn-reasoning-parser",
type=str,
default=None,
choices=get_reasoning_parser_names(),
help="Reasoning parser name for the model. If not specified, no reasoning parsing is performed.",
)
parser.add_argument(
"--connector",
type=str,
default="none",
choices=["none","kvbm"],
help="Connector to use for the model.",
)
add_config_dump_args(parser)
parser.add_argument(
"--custom-jinja-template",
type=str,
default=None,
help="Path to a custom Jinja template file to override the model's default chat template. This template will take precedence over any template found in the model repository.",
)
parser.add_argument(
"--dyn-endpoint-types",
type=str,
default="chat,completions",
help="Comma-separated list of endpoint types to enable. Options: 'chat', 'completions'. Default: 'chat,completions'. Use 'completions' for models without chat templates.",
help="Discovery backend: kubernetes (K8s API), etcd (distributed KV), file (local filesystem), mem (in-memory). Etcd uses the ETCD_* env vars (e.g. ETCD_ENDPOINTS) for connection details. File uses root dir from env var DYN_FILE_KV or defaults to $TMPDIR/dynamo_store_kv.",
help="Enable durable KV events using NATS JetStream instead of the local indexer. By default, local indexer is enabled for lower latency. Use this flag when you need durability and multi-replica router consistency. Requires NATS with JetStream enabled. Can also be set via DYN_DURABLE_KV_EVENTS=true env var.",
)
# Diffusion-specific options (only used when modality is video_diffusion or image_diffusion)
diffusion_group=parser.add_argument_group(
"Diffusion Options [Experimental]",
"Options for video_diffusion modality",
)
diffusion_group.add_argument(
"--output-dir",
type=str,
default="/tmp/dynamo_videos",
help="Directory to store generated videos/images. Default: /tmp/dynamo_videos",
)
diffusion_group.add_argument(
"--default-height",
type=int,
default=480,
help="Default video/image height in pixels. Default: 480",
)
diffusion_group.add_argument(
"--default-width",
type=int,
default=832,
help="Default video/image width in pixels. Default: 832",
)
diffusion_group.add_argument(
"--default-num-frames",
type=int,
default=81,
help="Default number of frames for video generation. Default: 81",
)
diffusion_group.add_argument(
"--default-num-inference-steps",
type=int,
default=50,
help="Default number of inference steps. Default: 50",
)
diffusion_group.add_argument(
"--default-guidance-scale",
type=float,
default=5.0,
help="Default CFG guidance scale. Default: 5.0",
)
diffusion_group.add_argument(
"--enable-teacache",
action="store_true",
help="Enable TeaCache optimization for faster generation.",